###Before ocntinuing shall check to add the int(CHR) from the python script…. and rerun all the samples.
###Libraries
rm(list = ls(all.names = TRUE))
#.libPaths("/data/botos/RLibs/")
#.libPaths("/data/botos/RLibs/")
#BiocManager::install("S4Vectors",update = TRUE,ask = FALSE,force = TRUE)
#install.packages("wesanderson",lib = .libPaths()[1])
library(wesanderson)
library(tidyr)
library(tidyverse)
── Attaching core tidyverse packages ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.0 ✔ purrr 1.0.1
✔ forcats 1.0.0 ✔ readr 2.1.4
✔ ggplot2 3.4.1 ✔ stringr 1.5.0
✔ lubridate 1.9.2 ✔ tibble 3.2.0
── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ purrr::flatten() masks jsonlite::flatten()
✖ dplyr::lag() masks stats::lag()
✖ jsonlite::validate() masks shiny::validate()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(RColorBrewer)
library(brew)
library(circlize)
========================================
circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/
If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
in R. Bioinformatics 2014.
This message can be suppressed by:
suppressPackageStartupMessages(library(circlize))
========================================
library(S4Vectors)
Loading required package: stats4
Loading required package: BiocGenerics
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:lubridate’:
intersect, setdiff, union
The following objects are masked from ‘package:dplyr’:
combine, intersect, setdiff, union
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep, grepl, intersect,
is.unsorted, lapply, Map, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which.max, which.min
Error in value[[3L]](cond) :
Package ‘S4Vectors’ version 0.32.4 cannot be unloaded:
Error in unloadNamespace(package) : namespace ‘S4Vectors’ is imported by ‘DelayedArray’, ‘IRanges’, ‘GenomicAlignments’, ‘GenomeInfoDb’, ‘BiocIO’, ‘SummarizedExperiment’, ‘restfulr’, ‘plyranges’, ‘rtracklayer’, ‘Biostrings’, ‘Rsamtools’, ‘AnnotationDbi’, ‘GenomicRanges’, ‘XVector’ so cannot be unloaded
##Load the data
files <- list.files(all.files = TRUE,path = "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti/",
pattern = "*.mt.disc.sam.cluster.summary.tsv",
recursive = TRUE,
full.names = TRUE)
#Sort the files by the number of output
files <- files[order(nchar(files))]
files
[1] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_017_E20_F03/WGS_017_E20_F03.mt.disc.sam.cluster.summary.tsv"
[2] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_021_E20_F02/WGS_021_E20_F02.mt.disc.sam.cluster.summary.tsv"
[3] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_024_E20_F02/WGS_024_E20_F02.mt.disc.sam.cluster.summary.tsv"
[4] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_025_E20_F02/WGS_025_E20_F02.mt.disc.sam.cluster.summary.tsv"
[5] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_038_E20_F07/WGS_038_E20_F07.mt.disc.sam.cluster.summary.tsv"
[6] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_040_E20_F02/WGS_040_E20_F02.mt.disc.sam.cluster.summary.tsv"
[7] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_042_E20_F02/WGS_042_E20_F02.mt.disc.sam.cluster.summary.tsv"
[8] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_043_E20_F02/WGS_043_E20_F02.mt.disc.sam.cluster.summary.tsv"
[9] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_047_E20_F02/WGS_047_E20_F02.mt.disc.sam.cluster.summary.tsv"
[10] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_050_E20_F02/WGS_050_E20_F02.mt.disc.sam.cluster.summary.tsv"
[11] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_050_E20_F03/WGS_050_E20_F03.mt.disc.sam.cluster.summary.tsv"
[12] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_060_E20_F06/WGS_060_E20_F06.mt.disc.sam.cluster.summary.tsv"
[13] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_063_E20_F02/WGS_063_E20_F02.mt.disc.sam.cluster.summary.tsv"
[14] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_080_E20_F02/WGS_080_E20_F02.mt.disc.sam.cluster.summary.tsv"
[15] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_084_E20_F02/WGS_084_E20_F02.mt.disc.sam.cluster.summary.tsv"
[16] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_088_E20_F05/WGS_088_E20_F05.mt.disc.sam.cluster.summary.tsv"
[17] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_088_E20_F06/WGS_088_E20_F06.mt.disc.sam.cluster.summary.tsv"
[18] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_095_E20_F11/WGS_095_E20_F11.mt.disc.sam.cluster.summary.tsv"
[19] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_115_E20_F03/WGS_115_E20_F03.mt.disc.sam.cluster.summary.tsv"
[20] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_132_E20_F10/WGS_132_E20_F10.mt.disc.sam.cluster.summary.tsv"
[21] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_134_E20_F02/WGS_134_E20_F02.mt.disc.sam.cluster.summary.tsv"
[22] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_211_E20_F02/WGS_211_E20_F02.mt.disc.sam.cluster.summary.tsv"
[23] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_220_E20_F07/WGS_220_E20_F07.mt.disc.sam.cluster.summary.tsv"
[24] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_230_E20_F06/WGS_230_E20_F06.mt.disc.sam.cluster.summary.tsv"
[25] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_240_E20_F05/WGS_240_E20_F05.mt.disc.sam.cluster.summary.tsv"
[26] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_246_E20_F05/WGS_246_E20_F05.mt.disc.sam.cluster.summary.tsv"
[27] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_246_E20_F07/WGS_246_E20_F07.mt.disc.sam.cluster.summary.tsv"
[28] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_249_E20_F02/WGS_249_E20_F02.mt.disc.sam.cluster.summary.tsv"
[29] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_252_E20_F02/WGS_252_E20_F02.mt.disc.sam.cluster.summary.tsv"
[30] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_253_E20_F01/WGS_253_E20_F01.mt.disc.sam.cluster.summary.tsv"
[31] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_262_E20_F07/WGS_262_E20_F07.mt.disc.sam.cluster.summary.tsv"
[32] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_266_E20_F04/WGS_266_E20_F04.mt.disc.sam.cluster.summary.tsv"
[33] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_289_E20_F02/WGS_289_E20_F02.mt.disc.sam.cluster.summary.tsv"
[34] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_299_E20_F09/WGS_299_E20_F09.mt.disc.sam.cluster.summary.tsv"
[35] "/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_WGS_300_E20_F05/WGS_300_E20_F05.mt.disc.sam.cluster.summary.tsv"
#files
#name_files <- gsub("/home/marius/Documents/Projects/prsa/02_Data/STAR/mapping/release102/","",files)
# name_files <- gsub("D:/PhD/Projects/prsa/02_Data/STAR/mapping/release102/","",files)
name_files <- gsub("/data/botos/2022_12_01_NUMTS_Aedes_Aegypti//NUMTS_OUTPUT_","",
gsub(".mt.disc.sam.cluster.summary.tsv","",
gsub("WGS.*F[0-9][0-9]*/","",files)))
names(files) <- name_files
#head(files)
#Read it inside lists.
#lapply(names(files),function(x) head(x))
dfs_numts <- lapply(files,function(fj) read.table(fj,header = FALSE))
paste0("List of ",length(dfs_numts)," samples.\n")
[1] "List of 35 samples.\n"
# Split the column into the position data
dfs_numts_pos <- lapply(dfs_numts,function(df) {
separate(data = df,col = V3,into = c("chr","start","end","MT","mt_start","mt_end"),sep = "_")
})
# add the length of the segment in the chr or mt chromosomes
dfs_numts_pos_match_len <- lapply(dfs_numts_pos,function(df) {
df <- dplyr::mutate(df,
chr_matchLen = as.integer(end) - as.integer(start),
mt_matchLen = as.integer(mt_end) - as.integer(mt_start))
return(df)})
#print
dfs_numts_pos_match_len$WGS_017_E20_F03
# V5 = Cluster sequences that are in the same cluster 500bp apart gap.
# V6 = cluster of read sequences that are max 500bp apart and mininum 2
lapply(dfs_numts_pos_match_len,function(df) {
df[["V2"]][1]
# names(df)
# df %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::filter(chr_matchLen > 5 & mt_matchLen > 5) %>% dplyr::arrange(V6)
})
$WGS_017_E20_F03
[1] "WGS_017_E20_F03"
$WGS_021_E20_F02
[1] "WGS_021_E20_F02"
$WGS_024_E20_F02
[1] "WGS_024_E20_F02"
$WGS_025_E20_F02
[1] "WGS_025_E20_F02"
$WGS_038_E20_F07
[1] "WGS_038_E20_F07"
$WGS_040_E20_F02
[1] "WGS_040_E20_F02"
$WGS_042_E20_F02
[1] "WGS_042_E20_F02"
$WGS_043_E20_F02
[1] "WGS_043_E20_F02"
$WGS_047_E20_F02
[1] "WGS_047_E20_F02"
$WGS_050_E20_F02
[1] "WGS_050_E20_F02"
$WGS_050_E20_F03
[1] "WGS_050_E20_F03"
$WGS_060_E20_F06
[1] "WGS_060_E20_F06"
$WGS_063_E20_F02
[1] "WGS_063_E20_F02"
$WGS_080_E20_F02
[1] "WGS_080_E20_F02"
$WGS_084_E20_F02
[1] "WGS_084_E20_F02"
$WGS_088_E20_F05
[1] "WGS_088_E20_F05"
$WGS_088_E20_F06
[1] "WGS_088_E20_F06"
$WGS_095_E20_F11
[1] "WGS_095_E20_F11"
$WGS_115_E20_F03
[1] "WGS_115_E20_F03"
$WGS_132_E20_F10
[1] "WGS_132_E20_F10"
$WGS_134_E20_F02
[1] "WGS_134_E20_F02"
$WGS_211_E20_F02
[1] "WGS_211_E20_F02"
$WGS_220_E20_F07
[1] "WGS_220_E20_F07"
$WGS_230_E20_F06
[1] "WGS_230_E20_F06"
$WGS_240_E20_F05
[1] "WGS_240_E20_F05"
$WGS_246_E20_F05
[1] "WGS_246_E20_F05"
$WGS_246_E20_F07
[1] "WGS_246_E20_F07"
$WGS_249_E20_F02
[1] "WGS_249_E20_F02"
$WGS_252_E20_F02
[1] "WGS_252_E20_F02"
$WGS_253_E20_F01
[1] "WGS_253_E20_F01"
$WGS_262_E20_F07
[1] "WGS_262_E20_F07"
$WGS_266_E20_F04
[1] "WGS_266_E20_F04"
$WGS_289_E20_F02
[1] "WGS_289_E20_F02"
$WGS_299_E20_F09
[1] "WGS_299_E20_F09"
$WGS_300_E20_F05
[1] "WGS_300_E20_F05"
###Visualize the data in violins
library(ggplot2)
library(dplyr)
# Iterate over the list of dataframes
plots_df_numts <- lapply(dfs_numts_pos_match_len,function(df){
# Create the plot
fig <- ggplot(df %>% dplyr::group_by(V2) %>% dplyr::filter(chr_matchLen > 5 & mt_matchLen > 5), aes(x=chr_matchLen,y="")) +
geom_violin(fill="#FAF0E6", alpha=0.4) +
geom_jitter(aes(color="Nuclear"), size=3, alpha=0.3,show.legend = FALSE) +
#geom_boxplot(width=0.1) +
scale_color_manual(values="#000080") +
# xlim(-30,max(df$chr_matchLen)+30) +
#xlim(0,1100) +
scale_x_continuous(breaks = seq(0,1100,100),limits=c(-30,1100)) +
labs(title=paste0("Nuclear: ",unique(df$V2))) +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y = element_blank(),
plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
fig2 <- ggplot( df %>% dplyr::group_by(V2) %>% dplyr::filter(chr_matchLen > 0 & mt_matchLen > 0), aes(x=mt_matchLen,y="")) +
geom_violin(fill="#FAF0E6", alpha=0.4) +
geom_jitter(aes(color="Mitochondrial"), size=3, alpha=0.3,show.legend = FALSE) +
#geom_boxplot(width=0.1) +
scale_color_manual(values="#800020") +
# xlim(-30,max(df$mt_matchLen)+30) +
#xlim(0,12900) +
scale_x_continuous(breaks = seq(0,12900,500),limits=c(-30,12900)) +
labs(title=paste0("Mitochondrial: ",unique(df$V2))) +
theme(axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
axis.title.y = element_blank(),
plot.background = element_rect(fill = "white"),
panel.background = element_rect(fill = "white"),
axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# Print the plot
print(list(fig,fig2))
# Add the plot to the list
#plots <- c(plots, list(fig))
return(plots=list(fig,fig2))
})
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
[[1]]
[[2]]
par(mfrow = c(35, 1))#, mar = rep(0.5, 4))
library(gridExtra)
Attaching package: 'gridExtra'
The following object is masked from 'package:BiocGenerics':
combine
The following object is masked from 'package:dplyr':
combine
lapply(names(plots_df_numts),function(nm) {
grid.arrange(grobs=plots_df_numts[[nm]],ncol=2)})
[[1]]
TableGrob (1 x 2) "arrange": 2 grobs
[[2]]
TableGrob (1 x 2) "arrange": 2 grobs
[[3]]
TableGrob (1 x 2) "arrange": 2 grobs
[[4]]
TableGrob (1 x 2) "arrange": 2 grobs
[[5]]
TableGrob (1 x 2) "arrange": 2 grobs
[[6]]
TableGrob (1 x 2) "arrange": 2 grobs
[[7]]
TableGrob (1 x 2) "arrange": 2 grobs
[[8]]
TableGrob (1 x 2) "arrange": 2 grobs
[[9]]
TableGrob (1 x 2) "arrange": 2 grobs
[[10]]
TableGrob (1 x 2) "arrange": 2 grobs
[[11]]
TableGrob (1 x 2) "arrange": 2 grobs
[[12]]
TableGrob (1 x 2) "arrange": 2 grobs
[[13]]
TableGrob (1 x 2) "arrange": 2 grobs
[[14]]
TableGrob (1 x 2) "arrange": 2 grobs
[[15]]
TableGrob (1 x 2) "arrange": 2 grobs
[[16]]
TableGrob (1 x 2) "arrange": 2 grobs
[[17]]
TableGrob (1 x 2) "arrange": 2 grobs
[[18]]
TableGrob (1 x 2) "arrange": 2 grobs
[[19]]
TableGrob (1 x 2) "arrange": 2 grobs
[[20]]
TableGrob (1 x 2) "arrange": 2 grobs
[[21]]
TableGrob (1 x 2) "arrange": 2 grobs
[[22]]
TableGrob (1 x 2) "arrange": 2 grobs
[[23]]
TableGrob (1 x 2) "arrange": 2 grobs
[[24]]
TableGrob (1 x 2) "arrange": 2 grobs
[[25]]
TableGrob (1 x 2) "arrange": 2 grobs
[[26]]
TableGrob (1 x 2) "arrange": 2 grobs
[[27]]
TableGrob (1 x 2) "arrange": 2 grobs
[[28]]
TableGrob (1 x 2) "arrange": 2 grobs
[[29]]
TableGrob (1 x 2) "arrange": 2 grobs
[[30]]
TableGrob (1 x 2) "arrange": 2 grobs
[[31]]
TableGrob (1 x 2) "arrange": 2 grobs
[[32]]
TableGrob (1 x 2) "arrange": 2 grobs
[[33]]
TableGrob (1 x 2) "arrange": 2 grobs
[[34]]
TableGrob (1 x 2) "arrange": 2 grobs
[[35]]
TableGrob (1 x 2) "arrange": 2 grobs
NA
NA
lapply(dfs_numts_pos_match_len,function(df) {
df %>% dplyr::group_by(V2) %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>% dplyr::summarise(n=n())})
$WGS_017_E20_F03
$WGS_021_E20_F02
$WGS_024_E20_F02
$WGS_025_E20_F02
$WGS_038_E20_F07
$WGS_040_E20_F02
$WGS_042_E20_F02
$WGS_043_E20_F02
$WGS_047_E20_F02
$WGS_050_E20_F02
$WGS_050_E20_F03
$WGS_060_E20_F06
$WGS_063_E20_F02
$WGS_080_E20_F02
$WGS_084_E20_F02
$WGS_088_E20_F05
$WGS_088_E20_F06
$WGS_095_E20_F11
$WGS_115_E20_F03
$WGS_132_E20_F10
$WGS_134_E20_F02
$WGS_211_E20_F02
$WGS_220_E20_F07
$WGS_230_E20_F06
$WGS_240_E20_F05
$WGS_246_E20_F05
$WGS_246_E20_F07
$WGS_249_E20_F02
$WGS_252_E20_F02
$WGS_253_E20_F01
$WGS_262_E20_F07
$WGS_266_E20_F04
$WGS_289_E20_F02
$WGS_299_E20_F09
$WGS_300_E20_F05
NA
###barplot of the number of numts filtered >20 length in the nuclear chr and >20 mt chr
per_sample_numts_df <- do.call(rbind,lapply(dfs_numts_pos_match_len,function(df) {df %>% dplyr::group_by(V2) %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>% dplyr::summarise(n=n())}))
svg("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/number_of_numts_per_sample_larger_20bp.svg",width = 9,height = 5)
per_sample_numts_df %>% ggplot(aes(x = V2,y = n)) +
geom_col() +
geom_text(data = per_sample_numts_df,aes(label=n), vjust= -0.2) +
ylab(label = "Numts Per Sample") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
dev.off()
null device
1
# counts_of_numts_longer_than_20bp <- lapply(dfs_numts_pos_match_len,function(df) {
# df %>% dplyr::group_by(V2) %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>% dplyr::summarise(n=n())})
#
counts_of_numts_longer_than_20bp <- lapply(dfs_numts_pos_match_len,function(df) df %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>% dplyr::mutate_at(c("mt_start","mt_end","mt_matchLen"),as.numeric) %>% group_by(group = cut(mt_matchLen, breaks = seq(0,17000,50))) %>% summarize(mt_matchLen = n(),sampleID = df[[2]][1]))
counts_of_numts_longer_than_20bp_df <- do.call(rbind,counts_of_numts_longer_than_20bp)
#dir.create("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table",recursive = TRUE)
write.table(counts_of_numts_longer_than_20bp_df,file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/numts_ranges_all_samples.csv",sep = ",")
p <- counts_of_numts_longer_than_20bp_df %>% ggplot(aes(x=as.factor(group), y=mt_matchLen,fill = ("red"))) +
geom_col(alpha=0.7) +
#geom_col(data= dataT[1:39,],mapping = aes(x=date,y=income/2 ,fill=d1)) +
geom_line(data= counts_of_numts_longer_than_20bp_df,group=1,mapping=aes(x=as.factor(group),y=mt_matchLen)) +
# geom_point(data = counts_of_numts_longer_than_20bp_df,
# aes(x = counts_of_numts_longer_than_20bp_df$group[which.max(counts_of_numts_longer_than_20bp_df$mt_matchLen)],
# y = counts_of_numts_longer_than_20bp_df$mt_matchLen[which.max(counts_of_numts_longer_than_20bp_df$mt_matchLen)]), color="black",size=3) +
#geom_area(position = "identity", alpha = 0.5,color="red") +
#geom_bar(stat = "identity") +
#stat_density(aes(geom="line",position="identity")) +
#geom_density(aes(after_stat(count))) +
#xlim(c(0,17000)) +
#coord_flip() +
labs(fill = "Numts Length Group") +
scale_x_discrete("group") +
facet_wrap(~sampleID,ncol = 2) +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
p
svg(filename = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/numts_ranges_all_samples_barplots_per_sample.svg",
width = 16,
height = 31)
p
dev.off()
png
2
###add mt genes
mt_genes_ae <- read.table(file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/Visualization_samples/mt_ae_gtf_genes_trna_rrna.txt",sep = "\t")
# unique(mt_genes_ae$V5)
# unique(mt_genes_ae$V6)
# unique(mt_genes_ae$V7)
# unique(mt_genes_ae$V4)
mt_genes_ae$paste <- paste(mt_genes_ae$V5,mt_genes_ae$V6,sep="_")
mt_genes_ae <- mt_genes_ae[,c("V2","V3","paste")]
mt_genes_ae <- mt_genes_ae %>% dplyr::filter(paste != "_") %>% dplyr::mutate(Genes= str_remove(paste,"^_|_$"))
mt_genes_ae <- mt_genes_ae[,c("V2","V3","Genes")]
mt_genes_ae$chr <- "chrM"
mt_genes_ae$value <- 1
mt_genes_ae <- mt_genes_ae[,c("chr","V2","V3","value","Genes")]
colnames(mt_genes_ae) <- c("chr","start","end","value","gene")
mt_genes_ae$start <- mt_genes_ae$start * 100000
mt_genes_ae$end <- mt_genes_ae$end * 100000
mt_genes_ae <- mt_genes_ae %>% dplyr::mutate_at(c("start","end","value"),as.numeric)
# chr_genes_ae <- df1_link_WGS_017_E20_F03
# chr_genes_ae$gene <- "Ecxample.2.1aa.2"
# anno_genes_ae <- rbind(mt_genes_ae,chr_genes_ae)
#
# anno_genes_ae <- anno_genes_ae %>% dplyr::mutate_at(c("start","end","value"),as.numeric)
# run and commennt and re runn the one on top
mt_genes_ae <- read.table(file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/Visualization_samples/mt_ae_gtf_genes_trna_rrna.txt",sep = "\t")
mt_genes_2 <- mt_genes_ae %>% dplyr::filter(V1 == "gene") %>% dplyr::select(V2,V3,V6) %>% dplyr::filter(V6 != "") %>% dplyr::mutate(chr="chrM",
value = 1)
mt_genes_2 <- mt_genes_2[,c("chr","V2","V3","value","V6")]
colnames(mt_genes_2) <- c("chr","start","end","value","gene")
mt_genes_2$start <- mt_genes_2$start * 100000
mt_genes_2$end <- mt_genes_2$end * 100000
mt_genes_2 <- mt_genes_2 %>% dplyr::mutate_at(c("start","end","value"),as.numeric)
circos_RIL_plots <- lapply(dfs_numts_pos_match_len,function(df) {
#png(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/",df[["V2"]][1],"_circos_chr_to_mt.png"),width = 680,height = 680)
#svg(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/",df[["V2"]][1],"_circos_chr_to_mt.svg"),width = 10,height = 10)
#run code for circos
df_s <- df %>% dplyr::group_by(chr) %>%
dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>%
dplyr::arrange(V6) %>%
dplyr::select(chr,start,end,chr_matchLen,mt_start,mt_end,mt_matchLen) %>%
dplyr::distinct()
df_s$mt_start <- as.integer(df_s$mt_start)*100000
df_s$mt_end <- as.integer(df_s$mt_end)*100000
chr_df_s <- df_s %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::select(chr,start,end,chr_matchLen) %>% dplyr::mutate(chr=paste0("chr",chr))
colnames(chr_df_s) <- c("chr","start","end","value")
mt_df_s <- df_s %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::select(mt_start,mt_end,mt_matchLen)
mt_df_s$chr <- "chrM"
colnames(mt_df_s) <- c("chr","start","end","value")
chr_df_s$start <- as.integer(chr_df_s$start)
chr_df_s$end <- as.integer(chr_df_s$end)
chr_mt_df_s <- dplyr::bind_rows(chr_df_s,mt_df_s)
#Create the chr and mt regions of the numts by splitting them.
df1_link <- chr_mt_df_s %>% dplyr::filter(chr %in% c("chr1","chr2","chr3"))
df2_link <- chr_mt_df_s %>% dplyr::filter(chr %in% c("chrM"))
# Plot the circos
#circos.par("track.height"=0.8, gap.degree=5, cell.padding=c(0, 0, 0, 0))
circos.clear()
circos.par(gap.degree=5)
ref_fd_ae <- data.frame("Chromosome"=c("chr1","chr2","chr3","chrM"),"ChromStart"=c(0,0,0,0),"Chromend"=c(310827022,474425716,409777670,16790*100000))
#circos.genomicInitialize(ref_fd_ae)
circos.genomicInitialize(ref_fd_ae,plotType = NULL)
circos.genomicLabels(mt_genes_ae ,labels.column = 5, cex=0.7,line_lwd=0.6, line_col="grey20", connection_height = 0.019,
labels_height = 0.2,niceFacing = TRUE,side = "outside")
# circos.genomicLabels(mt_genes_2 ,labels.column = 5, cex=0.5,line_lwd=0.6, line_col="grey20", connection_height = 0.05, labels_height = 0.2,niceFacing = TRUE,side = "outside")
circos.track(ylim=c(0, 1), panel.fun=function(x, y) {
chr=CELL_META$sector.index
xlim=CELL_META$xlim
ylim=CELL_META$ylim
circos.text(mean(xlim), mean(ylim), chr, cex=0.8)}, bg.col = c("#0000FF40","#0000FF40","#0000FF40","#FF000040"), bg.border=F, track.height=0.06)
circos.track(track.index = get.current.track.index(),
panel.fun = function(x, y) {
circos.genomicAxis(h = "bottom", direction = "inside",labels.cex = 0.6,tickLabelsStartFromZero = TRUE)})
set_track_gap(gap = 0.04)
# circos.genomicTrack(chr_mt_df_s %>% dplyr::distinct(),
# track.height=0.1,
# panel.fun = function(region, value, ...) {
# circos.genomicPoints(region, value,
# pch = 6,
# cex = 1.6,
# col="black")})#col=ifelse(value[[1]] > 150,"red","black"))})
circos.genomicTrack(chr_mt_df_s %>% dplyr::distinct(),
track.height=0.2,
panel.fun = function(region,value,...) {
circos.genomicRect(region, value,col = "#FF000040",...)})
#circos.genomicPoints(region, value,pch = 6,cex = 1.6,col="black")})#col=ifelse(value[[1]] > 150,"red","black"))})
#circos.update(sector.index = "chrM",track.index = 4)
#circos.points(x=col="red")
col <- alpha(wes_palette("Zissou1", n = nrow(df2_link), type = "continuous"), 0.4)
circos.genomicLink(df1_link %>% dplyr::mutate_at(c("start","end","value"),as.numeric),
df2_link %>% dplyr::mutate_at(c("start","end","value"),as.numeric),
#use lirbary wesanderson http://www.sthda.com/english/wiki/colors-in-r
col = col)
# col = colorRampPalette(brewer.pal(5, "Dark2"))(nrow(df2_link_WGS_017_E20_F03)))#,border = NA,transparency=0.1)
title(paste0(df[["V2"]][1]))
# dev.off()
})
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
Adding missing grouping variables: `chr`
#v5 is cluster of 500bp gaps
#v6 is cluster of more than 2 reads supporting the previous cluster
lapply(dfs_numts_pos_match_len,function(df) {
df_s <- df %>% dplyr::group_by(chr) %>%
dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>%
dplyr::arrange(V6) %>%
dplyr::select(chr,start,end,chr_matchLen,mt_start,mt_end,mt_matchLen) %>%
dplyr::distinct() %>%
write.table(paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/",df[["V2"]][1],"_chr_to_mt_table.csv"),sep = ",",col.names = TRUE,row.names = FALSE)
})
$WGS_017_E20_F03
NULL
$WGS_021_E20_F02
NULL
$WGS_024_E20_F02
NULL
$WGS_025_E20_F02
NULL
$WGS_038_E20_F07
NULL
$WGS_040_E20_F02
NULL
$WGS_042_E20_F02
NULL
$WGS_043_E20_F02
NULL
$WGS_047_E20_F02
NULL
$WGS_050_E20_F02
NULL
$WGS_050_E20_F03
NULL
$WGS_060_E20_F06
NULL
$WGS_063_E20_F02
NULL
$WGS_080_E20_F02
NULL
$WGS_084_E20_F02
NULL
$WGS_088_E20_F05
NULL
$WGS_088_E20_F06
NULL
$WGS_095_E20_F11
NULL
$WGS_115_E20_F03
NULL
$WGS_132_E20_F10
NULL
$WGS_134_E20_F02
NULL
$WGS_211_E20_F02
NULL
$WGS_220_E20_F07
NULL
$WGS_230_E20_F06
NULL
$WGS_240_E20_F05
NULL
$WGS_246_E20_F05
NULL
$WGS_246_E20_F07
NULL
$WGS_249_E20_F02
NULL
$WGS_252_E20_F02
NULL
$WGS_253_E20_F01
NULL
$WGS_262_E20_F07
NULL
$WGS_266_E20_F04
NULL
$WGS_289_E20_F02
NULL
$WGS_299_E20_F09
NULL
$WGS_300_E20_F05
NULL
# library(GenomicRanges)
# sapply(c("IRanges", "AnnotationDbi", "GenomicAlignments", "plyranges", "restfulr", "GenomicRanges", "Biostrings", "SummarizedExperiment", "BiocIO", "XVector", "Rsamtools", "rtracklayer", "DelayedArray", "GenomeInfoDb"), unloadNamespace)
library(plyranges)
mt_genes_2_ranges <- GenomicRanges::GRanges(seqnames = "NC_035159.1",ranges = IRanges::IRanges(start = mt_genes_2$start/100000,end = mt_genes_2$end/100000),genes=mt_genes_2$gene, y0=0,y1=0.13)
#mt_genes_2_ranges <- GRanges(seqnames = mt_genes_2$chr,ranges = IRanges(start = mt_genes_2$start/100000,end = mt_genes_2$end/100000),genes=mt_genes_2$gene, y0=0,y1=0.13)
#library(S4Vectors)
# library(GenomicRanges)
# library(IRanges)
#library(karyoploteR)
list_of_regions <- lapply(dfs_numts_pos_match_len,function(df){
cat(paste0("start with sample: ",df[["V2"]][1],"\n"))
ranges_df <- GenomicRanges::GRanges(seqnames = df %>% dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>% dplyr::filter(chr %in% c(1,2,3)) %>% dplyr::pull(chr),
ranges =IRanges::IRanges(start = as.numeric(df %>% dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>%
dplyr::filter(chr %in% c(1,2,3)) %>%
dplyr::pull(mt_start)),
end = as.numeric(df %>% dplyr::filter(chr_matchLen > 20 & mt_matchLen > 20) %>%
dplyr::filter(chr %in% c(1,2,3)) %>%
dplyr::pull(mt_end))),
real_chr="chrM")
ranges_df <- as.data.frame(ranges_df)
ranges_df$seqnames2 <- ranges_df$seqnames
#Set the name of the chrm to the official ncbi nane that i use to create under the genes for overlapping.
#ranges_df$seqnames <- "chrM"
ranges_df$seqnames <- "NC_035159.1"
ranges_df <- dplyr::mutate(ranges_df,
chr_start_end=paste0(seqnames,":",start,"-",end))
cat("extracting regions\n")
empty_gr <- GenomicRanges::GRanges(seqnames = character(), IRanges::IRanges(start = integer(), end = integer()))
regs1 <- if(nrow(ranges_df %>% dplyr::filter(seqnames2 == 1)) != 0){
toGRanges(ranges_df %>% filter(seqnames2 == 1) %>% dplyr::pull(chr_start_end))
}else{
empty_gr}
regs2 <- if(nrow(ranges_df %>% dplyr::filter(seqnames2 == 2)) != 0){
toGRanges(ranges_df %>% filter(seqnames2 == 2) %>% dplyr::pull(chr_start_end))
}else{
empty_gr}
regs3 <- if(nrow(ranges_df %>% dplyr::filter(seqnames2 == 3)) != 0){
toGRanges(ranges_df %>% filter(seqnames2 == 3) %>% dplyr::pull(chr_start_end))
}else{
empty_gr}
regs1_df <- as.data.frame(regs1) %>% dplyr::mutate(chrom="chr1")
regs2_df <- as.data.frame(regs2) %>% dplyr::mutate(chrom="chr2")
regs3_df <- as.data.frame(regs3) %>% dplyr::mutate(chrom="chr3")
regs_df <- rbind(regs1_df,regs2_df,regs3_df)
cat("done\n")
return(regs_df)})
start with sample: WGS_017_E20_F03
extracting regions
done
start with sample: WGS_021_E20_F02
extracting regions
done
start with sample: WGS_024_E20_F02
extracting regions
done
start with sample: WGS_025_E20_F02
extracting regions
done
start with sample: WGS_038_E20_F07
extracting regions
done
start with sample: WGS_040_E20_F02
extracting regions
done
start with sample: WGS_042_E20_F02
extracting regions
done
start with sample: WGS_043_E20_F02
extracting regions
done
start with sample: WGS_047_E20_F02
extracting regions
done
start with sample: WGS_050_E20_F02
extracting regions
done
start with sample: WGS_050_E20_F03
extracting regions
done
start with sample: WGS_060_E20_F06
extracting regions
done
start with sample: WGS_063_E20_F02
extracting regions
done
start with sample: WGS_080_E20_F02
extracting regions
done
start with sample: WGS_084_E20_F02
extracting regions
done
start with sample: WGS_088_E20_F05
extracting regions
done
start with sample: WGS_088_E20_F06
extracting regions
done
start with sample: WGS_095_E20_F11
extracting regions
done
start with sample: WGS_115_E20_F03
extracting regions
done
start with sample: WGS_132_E20_F10
extracting regions
done
start with sample: WGS_134_E20_F02
extracting regions
done
start with sample: WGS_211_E20_F02
extracting regions
done
start with sample: WGS_220_E20_F07
extracting regions
done
start with sample: WGS_230_E20_F06
extracting regions
done
start with sample: WGS_240_E20_F05
extracting regions
done
start with sample: WGS_246_E20_F05
extracting regions
done
start with sample: WGS_246_E20_F07
extracting regions
done
start with sample: WGS_249_E20_F02
extracting regions
done
start with sample: WGS_252_E20_F02
extracting regions
done
start with sample: WGS_253_E20_F01
extracting regions
done
start with sample: WGS_262_E20_F07
extracting regions
done
start with sample: WGS_266_E20_F04
extracting regions
done
start with sample: WGS_289_E20_F02
extracting regions
done
start with sample: WGS_299_E20_F09
extracting regions
done
start with sample: WGS_300_E20_F05
extracting regions
done
library(S4Vectors)
lapply(names(list_of_regions),function(kt){
#svg(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/karytypePlot_",kt,"numts_chr.svg"),width = 12,height = 4)
custom.genome <- toGRanges(data.frame(chr=c("NC_035159.1"),start=c(0),end=c(16790)))
kp <- plotKaryotype(genome = custom.genome)
kpDataBackground(kp, r0 = 0,r1 = 0.25)
kpDataBackground(kp, r0 = 0.25,r1 = 0.5,col="#FF000040")
kpDataBackground(kp, r0 = 0.5,r1 = 0.75,col="#FF000040")
kpDataBackground(kp, r0 = 0.75,r1 = 1,col="#FF000040")
kpRect(kp,mt_genes_2_ranges,col="red")
kpText(karyoplot = kp,data = mt_genes_2_ranges,labels = mt_genes_2_ranges$genes,y = 0.17,cex=0.6,col="red")
#add mt sequences from each chromoosome
#add numts from chr 1
kpRect(kp,data = list_of_regions[[kt]] %>% filter(chrom == "chr1") %>% toGRanges, y0 = 0.3,y1=0.4)
#add numts from chr 2
kpRect(kp,data = list_of_regions[[kt]] %>% filter(chrom == "chr2") %>% toGRanges, y0 = 0.6,y1 = 0.7)
#add numts from chr 3
kpRect(kp,data = list_of_regions[[kt]] %>% filter(chrom == "chr3") %>% toGRanges, y0 = 0.8,y1 = 0.9)
title(paste0(kt))
#dev.off()
})
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
[[7]]
NULL
[[8]]
NULL
[[9]]
NULL
[[10]]
NULL
[[11]]
NULL
[[12]]
NULL
[[13]]
NULL
[[14]]
NULL
[[15]]
NULL
[[16]]
NULL
[[17]]
NULL
[[18]]
NULL
[[19]]
NULL
[[20]]
NULL
[[21]]
NULL
[[22]]
NULL
[[23]]
NULL
[[24]]
NULL
[[25]]
NULL
[[26]]
NULL
[[27]]
NULL
[[28]]
NULL
[[29]]
NULL
[[30]]
NULL
[[31]]
NULL
[[32]]
NULL
[[33]]
NULL
[[34]]
NULL
[[35]]
NULL
list_of_regions_ID <- lapply(names(list_of_regions),function(name) {
df <- list_of_regions[[name]]
df$ID <- name
return(df)
})
svg(filename = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/Postions_in_CHR_of_numts.svg",width = 10,height = 32)
combined_list_of_regions_ID <- do.call(rbind,list_of_regions_ID)
# combined_list_of_regions_ID <- dplyr::mutate(combined_list_of_regions_ID,pos_chr_start_end = paste0(chrom,":",start,"-",end))
combined_list_of_regions_ID <- dplyr::mutate(combined_list_of_regions_ID,pos_chr_start_end = paste0(seqnames,":",start,"-",end))
table(combined_list_of_regions_ID$pos_chr_start_end) %>% subset(combined_list_of_regions_ID$pos_chr_start_end > 1)
NC_035159.1:1-3230 NC_035159.1:1-35 NC_035159.1:1-40 NC_035159.1:1-47 NC_035159.1:1-55 NC_035159.1:10-131 NC_035159.1:10054-12188
1 1 1 1 2 1 1
NC_035159.1:10079-10409 NC_035159.1:10086-10431 NC_035159.1:10098-10376 NC_035159.1:10141-10227 NC_035159.1:10141-10238 NC_035159.1:10141-10283 NC_035159.1:10145-10367
1 1 1 1 1 2 1
NC_035159.1:10153-10231 NC_035159.1:10154-10294 NC_035159.1:10170-11055 NC_035159.1:10197-11104 NC_035159.1:10217-10266 NC_035159.1:10218-10265 NC_035159.1:10228-10249
1 1 1 1 1 1 1
NC_035159.1:10229-10256 NC_035159.1:10229-10266 NC_035159.1:10230-10283 NC_035159.1:10231-10489 NC_035159.1:10233-10265 NC_035159.1:10239-10797 NC_035159.1:10260-10501
1 1 1 1 1 1 1
NC_035159.1:10388-10623 NC_035159.1:10475-11219 NC_035159.1:10516-10819 NC_035159.1:10549-10755 NC_035159.1:10590-10692 NC_035159.1:10590-11087 NC_035159.1:10617-10681
1 1 1 1 1 1 1
NC_035159.1:10634-10689 NC_035159.1:10646-10692 NC_035159.1:10691-11817 NC_035159.1:10991-11023 NC_035159.1:10991-11325 NC_035159.1:11-49 NC_035159.1:11000-11042
1 1 1 1 1 1 1
NC_035159.1:11030-11078 NC_035159.1:11030-11102 NC_035159.1:11030-11108 NC_035159.1:11030-11140 NC_035159.1:11030-11141 NC_035159.1:11037-11117 NC_035159.1:11039-11075
1 1 1 1 1 1 1
NC_035159.1:11047-11573 NC_035159.1:11052-11581 NC_035159.1:11058-11103 NC_035159.1:11060-11141 NC_035159.1:11072-11105 NC_035159.1:11074-11135 NC_035159.1:11078-11107
1 1 1 1 1 1 1
NC_035159.1:11079-11121 NC_035159.1:11089-11125 NC_035159.1:11102-11580 NC_035159.1:11103-11570 NC_035159.1:11104-11126 NC_035159.1:11104-11128 NC_035159.1:11104-11131
1 1 1 1 1 1 1
NC_035159.1:11104-11133 NC_035159.1:11104-11140 NC_035159.1:11104-11141 NC_035159.1:11104-11156 NC_035159.1:11104-11161 NC_035159.1:11104-11351 NC_035159.1:11105-11136
1 1 3 1 1 1 1
NC_035159.1:11105-11158 NC_035159.1:11110-11729 NC_035159.1:11112-11564 NC_035159.1:11133-12521 NC_035159.1:11144-11564 NC_035159.1:11153-11727 NC_035159.1:11197-11325
1 1 1 1 1 1 1
NC_035159.1:11198-11323 NC_035159.1:112-656 NC_035159.1:11201-11335 NC_035159.1:11201-11342 NC_035159.1:11204-11374 NC_035159.1:11205-11358 NC_035159.1:11205-11374
1 1 1 1 1 1 1
NC_035159.1:11212-11358 NC_035159.1:11215-11296 NC_035159.1:11215-11299 NC_035159.1:11227-11318 NC_035159.1:11228-12076 NC_035159.1:11239-11289 NC_035159.1:11244-11272
1 1 1 1 1 1 1
NC_035159.1:11276-11347 NC_035159.1:11281-11363 NC_035159.1:11326-11578 NC_035159.1:11328-11579 NC_035159.1:11331-11580 NC_035159.1:11337-11580 NC_035159.1:11341-11564
1 1 1 1 1 1 1
NC_035159.1:11341-11575 NC_035159.1:11350-11559 NC_035159.1:11374-11399 NC_035159.1:11374-11420 NC_035159.1:11375-11721 NC_035159.1:11459-12105 NC_035159.1:11476-13387
1 1 1 1 1 1 1
NC_035159.1:11504-11589 NC_035159.1:11507-11575 NC_035159.1:11508-11581 NC_035159.1:11514-11573 NC_035159.1:11515-11580 NC_035159.1:11517-11559 NC_035159.1:11518-11581
1 1 1 1 1 1 1
NC_035159.1:11521-11559 NC_035159.1:11551-11578 NC_035159.1:11552-11578 NC_035159.1:11556-11747 NC_035159.1:11573-12222 NC_035159.1:11578-11687 NC_035159.1:11578-11712
1 1 1 1 1 1 1
NC_035159.1:11621-12098 NC_035159.1:11632-11699 NC_035159.1:11632-11714 NC_035159.1:11632-11734 NC_035159.1:11634-11706 NC_035159.1:11639-11736 NC_035159.1:11643-11679
1 1 1 1 1 1 1
NC_035159.1:11644-11735 NC_035159.1:11651-11718 NC_035159.1:11672-13489 NC_035159.1:11681-11724 NC_035159.1:11684-11710 NC_035159.1:11684-11721 NC_035159.1:11684-11725
1 1 1 1 1 1 1
NC_035159.1:11684-11727 NC_035159.1:11684-11731 NC_035159.1:11685-11711 NC_035159.1:11686-11730 NC_035159.1:11686-11735 NC_035159.1:11687-11727 NC_035159.1:11687-11735
1 1 1 1 1 4 1
NC_035159.1:11689-11724 NC_035159.1:11689-12171 NC_035159.1:11697-11728 NC_035159.1:11698-11734 NC_035159.1:11735-11925 NC_035159.1:11747-11901 NC_035159.1:11791-12211
1 1 1 1 1 1 1
NC_035159.1:11911-11981 NC_035159.1:12190-12242 NC_035159.1:12207-12465 NC_035159.1:12304-12356 NC_035159.1:12330-12355 NC_035159.1:12409-12635 NC_035159.1:12423-12516
1 1 1 1 1 1 1
NC_035159.1:12424-12471 NC_035159.1:12425-12911 NC_035159.1:12443-12623 NC_035159.1:12450-12601 NC_035159.1:12456-12623 NC_035159.1:12457-12537 NC_035159.1:12460-12599
1 1 1 1 1 1 1
NC_035159.1:12464-12492 NC_035159.1:12494-12617 NC_035159.1:12500-12616 NC_035159.1:12514-12623 NC_035159.1:12519-12644 NC_035159.1:12547-12919 NC_035159.1:12548-12690
1 1 1 1 1 1 1
NC_035159.1:12615-13043 NC_035159.1:12622-12737 NC_035159.1:1270-1297 NC_035159.1:1270-1303 NC_035159.1:1270-1304 NC_035159.1:1270-1307 NC_035159.1:12760-13242
1 1 1 1 2 1 1
NC_035159.1:12903-14067 NC_035159.1:12928-13670 NC_035159.1:12930-13042 NC_035159.1:12932-13148 NC_035159.1:12933-13004 NC_035159.1:12934-13169 NC_035159.1:12936-13175
1 1 1 1 1 1 1
NC_035159.1:12944-13130 NC_035159.1:12947-13113 NC_035159.1:12948-13034 NC_035159.1:12951-13025 NC_035159.1:12951-13032 NC_035159.1:12951-13277 NC_035159.1:12954-13064
1 1 1 1 1 1 1
NC_035159.1:12960-13155 NC_035159.1:12961-13158 NC_035159.1:12965-13132 NC_035159.1:12971-13172 NC_035159.1:12980-13097 NC_035159.1:12981-13175 NC_035159.1:12989-13140
1 1 1 1 1 1 1
NC_035159.1:13002-13129 NC_035159.1:13015-13153 NC_035159.1:13049-13094 NC_035159.1:13057-13078 NC_035159.1:13058-13336 NC_035159.1:13096-13164 NC_035159.1:13096-13171
1 1 1 1 1 1 1
NC_035159.1:13114-13357 NC_035159.1:13163-13667 NC_035159.1:13190-13217 NC_035159.1:13192-13213 NC_035159.1:13192-13216 NC_035159.1:13192-13217 NC_035159.1:13192-13221
1 1 1 1 1 1 1
NC_035159.1:13210-13287 NC_035159.1:13212-13494 NC_035159.1:13273-13717 NC_035159.1:13280-13407 NC_035159.1:1334-1407 NC_035159.1:13365-13629 NC_035159.1:13373-13589
1 1 1 1 1 1 1
NC_035159.1:13374-13560 NC_035159.1:13382-13668 NC_035159.1:13426-13728 NC_035159.1:13439-13687 NC_035159.1:13474-13503 NC_035159.1:13516-14094 NC_035159.1:13546-13682
1 1 1 1 1 1 1
NC_035159.1:13546-13723 NC_035159.1:13548-13656 NC_035159.1:13565-13666 NC_035159.1:1358-1379 NC_035159.1:136-2639 NC_035159.1:13643-13893 NC_035159.1:13691-13862
1 1 1 1 1 1 1
NC_035159.1:13737-13894 NC_035159.1:13795-13833 NC_035159.1:13795-13878 NC_035159.1:1381-1792 NC_035159.1:13873-14224 NC_035159.1:13883-13926 NC_035159.1:13907-14140
1 1 1 1 1 1 1
NC_035159.1:13923-14078 NC_035159.1:13951-13989 NC_035159.1:13990-14019 NC_035159.1:14077-14302 NC_035159.1:14273-14297 NC_035159.1:14273-14298 NC_035159.1:14281-14661
1 1 1 1 1 1 1
NC_035159.1:14297-16598 NC_035159.1:14318-15374 NC_035159.1:14660-14687 NC_035159.1:14666-15629 NC_035159.1:14680-14822 NC_035159.1:14699-14750 NC_035159.1:14700-14822
1 1 1 1 1 1 1
NC_035159.1:14707-15646 NC_035159.1:14716-16386 NC_035159.1:14742-15778 NC_035159.1:14746-14829 NC_035159.1:14747-15165 NC_035159.1:14765-14793 NC_035159.1:14769-14805
1 1 1 1 1 1 1
NC_035159.1:1477-2038 NC_035159.1:14775-15116 NC_035159.1:14791-14827 NC_035159.1:14791-15774 NC_035159.1:14792-14838 NC_035159.1:14800-14824 NC_035159.1:14803-15152
1 1 1 1 1 1 1
NC_035159.1:14805-14956 NC_035159.1:1481-1806 NC_035159.1:14818-16236 NC_035159.1:14823-16434 NC_035159.1:14855-15351 NC_035159.1:14869-16153 NC_035159.1:14875-16444
1 1 1 1 1 1 1
NC_035159.1:14877-15621 NC_035159.1:14881-15157 NC_035159.1:14887-15649 NC_035159.1:14898-15015 NC_035159.1:14904-15088 NC_035159.1:14904-15381 NC_035159.1:14930-15200
1 1 1 1 1 1 1
NC_035159.1:15044-16482 NC_035159.1:15080-15361 NC_035159.1:15081-15139 NC_035159.1:15087-15272 NC_035159.1:15113-15356 NC_035159.1:15114-16276 NC_035159.1:15129-15683
1 1 1 1 1 1 1
NC_035159.1:15139-15353 NC_035159.1:15141-16392 NC_035159.1:15157-15361 NC_035159.1:15170-15359 NC_035159.1:15196-15354 NC_035159.1:15241-15678 NC_035159.1:15266-15629
1 1 1 1 1 1 1
NC_035159.1:15363-15664 NC_035159.1:15401-15629 NC_035159.1:15427-15629 NC_035159.1:15434-15629 NC_035159.1:15448-15609 NC_035159.1:15448-15629 NC_035159.1:15453-15634
1 1 1 1 1 19 1
NC_035159.1:1550-2394 NC_035159.1:15570-15748 NC_035159.1:15682-16479 NC_035159.1:15689-15815 NC_035159.1:15757-15812 NC_035159.1:15764-16455 NC_035159.1:15766-15787
1 1 1 1 1 1 1
NC_035159.1:15775-15806 NC_035159.1:15776-15837 NC_035159.1:15800-16141 NC_035159.1:15825-15968 NC_035159.1:15828-15919 NC_035159.1:15869-15971 NC_035159.1:15918-16330
1 1 1 1 1 1 1
NC_035159.1:15935-16013 NC_035159.1:15936-15984 NC_035159.1:15939-15968 NC_035159.1:15939-16624 NC_035159.1:15941-16029 NC_035159.1:15942-16091 NC_035159.1:15970-16016
1 1 1 1 1 1 1
NC_035159.1:15971-16067 NC_035159.1:15974-16091 NC_035159.1:15975-16091 NC_035159.1:15979-16379 NC_035159.1:16014-16320 NC_035159.1:16091-16367 NC_035159.1:16091-16458
1 1 1 1 1 1 1
NC_035159.1:16138-16301 NC_035159.1:16162-16377 NC_035159.1:16167-16518 NC_035159.1:16175-16400 NC_035159.1:16182-16270 NC_035159.1:16185-16400 NC_035159.1:16191-16401
1 1 1 1 1 1 1
NC_035159.1:16202-16400 NC_035159.1:16206-16444 NC_035159.1:16207-16464 NC_035159.1:16259-16449 NC_035159.1:16260-16449 NC_035159.1:16265-16598 NC_035159.1:16275-16449
1 1 1 1 1 1 1
NC_035159.1:16276-16457 NC_035159.1:16278-16450 NC_035159.1:16280-16449 NC_035159.1:16281-16449 NC_035159.1:16288-16449 NC_035159.1:16294-16477 NC_035159.1:16296-16449
1 1 1 1 1 1 1
NC_035159.1:16297-16459 NC_035159.1:16299-16449 NC_035159.1:16300-16454 NC_035159.1:16301-16348 NC_035159.1:16301-16365 NC_035159.1:16301-16449 NC_035159.1:16303-16361
1 1 1 1 1 1 1
NC_035159.1:16304-16338 NC_035159.1:16306-16360 NC_035159.1:16308-16426 NC_035159.1:16309-16373 NC_035159.1:16313-16357 NC_035159.1:16318-16356 NC_035159.1:1632-2188
1 1 1 1 1 1 1
NC_035159.1:16322-16347 NC_035159.1:16322-16349 NC_035159.1:16322-16355 NC_035159.1:16327-16402 NC_035159.1:16330-16393 NC_035159.1:16331-16381 NC_035159.1:16340-16547
1 1 1 1 1 1 1
NC_035159.1:16347-16387 NC_035159.1:16351-16560 NC_035159.1:16352-16409 NC_035159.1:16354-16393 NC_035159.1:16354-16399 NC_035159.1:16354-16486 NC_035159.1:16355-16408
1 1 1 1 1 1 1
NC_035159.1:16355-16411 NC_035159.1:16355-16429 NC_035159.1:16355-16449 NC_035159.1:16355-16451 NC_035159.1:16355-16471 NC_035159.1:16355-16476 NC_035159.1:16356-16439
1 1 1 1 1 1 1
NC_035159.1:16357-16391 NC_035159.1:16366-16389 NC_035159.1:16366-16398 NC_035159.1:16372-16399 NC_035159.1:16379-16657 NC_035159.1:16380-16583 NC_035159.1:16391-16638
1 1 1 1 1 1 1
NC_035159.1:16424-16614 NC_035159.1:16456-16575 NC_035159.1:16489-16678 NC_035159.1:16493-16602 NC_035159.1:16494-16652 NC_035159.1:16505-16642 NC_035159.1:17-51
1 1 1 1 1 1 1
NC_035159.1:170-750 NC_035159.1:1744-2450 NC_035159.1:1795-2651 NC_035159.1:1822-2191 NC_035159.1:1906-1952 NC_035159.1:1909-1935 NC_035159.1:1909-1937
1 1 1 1 1 1 1
NC_035159.1:1909-1940 NC_035159.1:1909-1956 NC_035159.1:1955-2386 NC_035159.1:1973-1996 NC_035159.1:2-226 NC_035159.1:2-32 NC_035159.1:216-437
1 1 1 1 1 1 1
NC_035159.1:22-1188 NC_035159.1:234-660 NC_035159.1:2390-2444 NC_035159.1:2505-2527 NC_035159.1:2802-2841 NC_035159.1:2802-2968 NC_035159.1:2803-2827
1 1 1 1 1 1 1
NC_035159.1:2803-2838 NC_035159.1:2803-2845 NC_035159.1:2803-2855 NC_035159.1:2809-2838 NC_035159.1:2810-2904 NC_035159.1:2899-2940 NC_035159.1:2899-2957
1 1 1 1 1 1 1
NC_035159.1:2899-2983 NC_035159.1:29-641 NC_035159.1:2904-2965 NC_035159.1:2904-3015 NC_035159.1:2904-3289 NC_035159.1:2904-3559 NC_035159.1:2904-3586
1 1 2 1 1 1 1
NC_035159.1:2904-3610 NC_035159.1:2904-3617 NC_035159.1:2904-3622 NC_035159.1:2904-3629 NC_035159.1:2904-3633 NC_035159.1:2921-3048 NC_035159.1:2934-3631
1 2 1 2 1 1 1
NC_035159.1:2936-5088 NC_035159.1:2941-3078 NC_035159.1:2941-3594 NC_035159.1:2944-3576 NC_035159.1:2965-3594 NC_035159.1:2975-3599 NC_035159.1:2995-3493
1 1 1 1 1 1 1
NC_035159.1:2997-3575 NC_035159.1:3018-4078 NC_035159.1:31-1693 NC_035159.1:3145-3196 NC_035159.1:3145-3212 NC_035159.1:3178-3478 NC_035159.1:3180-3327
1 1 1 1 1 1 1
NC_035159.1:319-367 NC_035159.1:321-366 NC_035159.1:3261-3404 NC_035159.1:3339-4231 NC_035159.1:3355-3401 NC_035159.1:34-1796 NC_035159.1:3407-3455
1 1 1 1 2 1 1
NC_035159.1:3411-3495 NC_035159.1:3418-4323 NC_035159.1:3420-4164 NC_035159.1:3460-3855 NC_035159.1:3502-3687 NC_035159.1:3515-4140 NC_035159.1:3525-3712
1 1 1 1 1 1 1
NC_035159.1:3545-3692 NC_035159.1:3558-3664 NC_035159.1:3561-3617 NC_035159.1:3594-3618 NC_035159.1:3653-3899 NC_035159.1:3694-3721 NC_035159.1:37-275
1 1 1 1 1 1 1
NC_035159.1:37-637 NC_035159.1:3739-3882 NC_035159.1:3781-13902 NC_035159.1:38-321 NC_035159.1:3824-3848 NC_035159.1:3976-3998 NC_035159.1:4019-4296
1 1 1 1 1 1 1
NC_035159.1:4071-4135 NC_035159.1:4081-5629 NC_035159.1:41-83 NC_035159.1:4115-4138 NC_035159.1:4118-4156 NC_035159.1:4119-5427 NC_035159.1:43-2423
1 1 1 1 1 1 1
NC_035159.1:4338-4975 NC_035159.1:4511-4768 NC_035159.1:4629-5040 NC_035159.1:4644-5557 NC_035159.1:4648-5126 NC_035159.1:4725-14004 NC_035159.1:4752-5950
1 1 1 1 1 1 1
NC_035159.1:4782-5606 NC_035159.1:479-2602 NC_035159.1:4809-5971 NC_035159.1:4810-5104 NC_035159.1:5004-5334 NC_035159.1:5014-5312 NC_035159.1:5014-5352
1 1 1 1 1 1 1
NC_035159.1:522-1968 NC_035159.1:53-135 NC_035159.1:5304-5725 NC_035159.1:5309-5464 NC_035159.1:5326-5535 NC_035159.1:5347-5512 NC_035159.1:5358-5390
1 1 1 1 1 1 1
NC_035159.1:5360-5583 NC_035159.1:5367-5589 NC_035159.1:5369-5591 NC_035159.1:5420-5524 NC_035159.1:5423-5660 NC_035159.1:5425-5501 NC_035159.1:5434-5519
1 1 1 1 1 1 1
NC_035159.1:5434-5571 NC_035159.1:5442-5541 NC_035159.1:545-595 NC_035159.1:545-609 NC_035159.1:5452-5533 NC_035159.1:5459-5558 NC_035159.1:5475-5515
1 1 1 1 1 1 1
NC_035159.1:5484-5583 NC_035159.1:5487-5585 NC_035159.1:5490-5535 NC_035159.1:55-1948 NC_035159.1:556-582 NC_035159.1:556-583 NC_035159.1:556-595
1 1 1 1 2 1 1
NC_035159.1:5592-5672 NC_035159.1:5627-5891 NC_035159.1:5709-5734 NC_035159.1:5712-5754 NC_035159.1:5712-5765 NC_035159.1:5978-6153 NC_035159.1:5979-6011
1 1 1 1 1 1 1
NC_035159.1:6-129 NC_035159.1:6005-6166 NC_035159.1:6020-13541 NC_035159.1:6044-6179 NC_035159.1:6060-9643 NC_035159.1:608-1221 NC_035159.1:6117-8288
1 1 1 1 1 1 1
NC_035159.1:6118-6234 NC_035159.1:6127-6170 NC_035159.1:6176-6207 NC_035159.1:6176-6208 NC_035159.1:6213-6754 NC_035159.1:6240-6704 NC_035159.1:6279-6446
1 1 1 1 1 1 1
NC_035159.1:6279-6448 NC_035159.1:6279-6456 NC_035159.1:6279-6475 NC_035159.1:6317-6775 NC_035159.1:6318-8935 NC_035159.1:6419-9423 NC_035159.1:6442-6700
1 1 1 1 1 1 1
NC_035159.1:6443-6671 NC_035159.1:6456-6712 NC_035159.1:6525-7075 NC_035159.1:6539-7178 NC_035159.1:6552-6715 NC_035159.1:6573-6741 NC_035159.1:6621-7011
1 1 1 1 1 1 1
NC_035159.1:6622-6824 NC_035159.1:6645-7097 NC_035159.1:6657-8534 NC_035159.1:6658-6813 NC_035159.1:6659-6709 NC_035159.1:6663-6727 NC_035159.1:6664-6696
1 1 1 1 1 1 1
NC_035159.1:6664-6729 NC_035159.1:6664-6733 NC_035159.1:6665-6710 NC_035159.1:6665-6964 NC_035159.1:6666-6732 NC_035159.1:6666-6964 NC_035159.1:6667-6688
1 1 1 2 1 1 1
NC_035159.1:6667-6701 NC_035159.1:6668-7498 NC_035159.1:6669-6723 NC_035159.1:6670-6721 NC_035159.1:6677-6703 NC_035159.1:6696-6723 NC_035159.1:6704-6742
1 1 1 1 1 1 1
NC_035159.1:6711-6767 NC_035159.1:6721-7098 NC_035159.1:6730-6826 NC_035159.1:6737-8610 NC_035159.1:680-1777 NC_035159.1:6812-7416 NC_035159.1:6844-7239
1 1 1 1 1 1 1
NC_035159.1:6888-7257 NC_035159.1:7041-7151 NC_035159.1:706-1187 NC_035159.1:7073-7142 NC_035159.1:7077-7168 NC_035159.1:708-956 NC_035159.1:709-881
1 1 1 1 1 1 1
NC_035159.1:717-842 NC_035159.1:719-861 NC_035159.1:723-782 NC_035159.1:725-870 NC_035159.1:7289-7845 NC_035159.1:7291-8783 NC_035159.1:7292-7582
1 1 1 1 1 1 1
NC_035159.1:7335-7400 NC_035159.1:7335-7498 NC_035159.1:7335-7529 NC_035159.1:7335-7538 NC_035159.1:7335-7578 NC_035159.1:7335-7617 NC_035159.1:7335-7669
1 1 1 1 1 1 1
NC_035159.1:7364-7605 NC_035159.1:7366-7544 NC_035159.1:7369-7625 NC_035159.1:7370-7487 NC_035159.1:7370-7621 NC_035159.1:7372-7642 NC_035159.1:7373-7481
1 1 1 1 1 1 1
NC_035159.1:7373-7551 NC_035159.1:7375-7659 NC_035159.1:7379-7495 NC_035159.1:738-824 NC_035159.1:7381-7484 NC_035159.1:7381-7565 NC_035159.1:7383-7405
1 1 1 1 1 1 1
NC_035159.1:7383-7659 NC_035159.1:7384-7624 NC_035159.1:7386-7426 NC_035159.1:7386-7680 NC_035159.1:7401-7587 NC_035159.1:7434-8969 NC_035159.1:7440-7652
1 1 1 1 1 1 1
NC_035159.1:7444-7472 NC_035159.1:7447-7713 NC_035159.1:7458-7639 NC_035159.1:7493-8264 NC_035159.1:75-2582 NC_035159.1:7509-7839 NC_035159.1:7520-7930
1 1 1 1 1 1 1
NC_035159.1:7533-7624 NC_035159.1:7540-7590 NC_035159.1:7566-7860 NC_035159.1:7607-7953 NC_035159.1:7612-7636 NC_035159.1:7729-8212 NC_035159.1:7753-7937
1 1 1 1 1 1 1
NC_035159.1:7757-12015 NC_035159.1:784-829 NC_035159.1:784-835 NC_035159.1:784-838 NC_035159.1:784-860 NC_035159.1:7840-7866 NC_035159.1:7840-7882
1 1 1 1 1 1 1
NC_035159.1:7840-7909 NC_035159.1:7840-7988 NC_035159.1:7860-8158 NC_035159.1:7915-9001 NC_035159.1:796-850 NC_035159.1:80-119 NC_035159.1:80-135
1 1 1 1 1 1 1
NC_035159.1:80-141 NC_035159.1:80-150 NC_035159.1:80-205 NC_035159.1:80-243 NC_035159.1:8000-8231 NC_035159.1:8000-8240 NC_035159.1:805-838
1 1 1 1 1 1 1
NC_035159.1:8062-8776 NC_035159.1:8064-8873 NC_035159.1:808-830 NC_035159.1:808-832 NC_035159.1:808-836 NC_035159.1:808-838 NC_035159.1:808-841
1 1 1 1 2 1 1
NC_035159.1:808-846 NC_035159.1:808-849 NC_035159.1:8133-8249 NC_035159.1:8135-8226 NC_035159.1:8138-8276 NC_035159.1:8144-8212 NC_035159.1:8146-8171
1 1 1 1 1 1 1
NC_035159.1:8168-8306 NC_035159.1:8181-8276 NC_035159.1:8183-8224 NC_035159.1:8184-8224 NC_035159.1:8209-8330 NC_035159.1:8220-8258 NC_035159.1:823-2546
1 1 1 1 1 1 1
NC_035159.1:8300-8322 NC_035159.1:8439-8816 NC_035159.1:8553-8788 NC_035159.1:8603-8764 NC_035159.1:8757-9442 NC_035159.1:8804-9256 NC_035159.1:8808-9104
1 1 1 1 1 1 1
NC_035159.1:8808-9856 NC_035159.1:8819-9975 NC_035159.1:8897-9390 NC_035159.1:8908-10196 NC_035159.1:8981-9014 NC_035159.1:8984-9217 NC_035159.1:8988-9181
1 1 1 1 1 1 1
NC_035159.1:8990-9224 NC_035159.1:8994-9094 NC_035159.1:9-220 NC_035159.1:9001-9088 NC_035159.1:9005-9188 NC_035159.1:9019-9282 NC_035159.1:9022-9080
1 1 1 1 1 1 1
NC_035159.1:9040-9197 NC_035159.1:9040-9234 NC_035159.1:9058-9181 NC_035159.1:9069-9225 NC_035159.1:9071-9157 NC_035159.1:910-1376 NC_035159.1:9164-9280
1 1 1 1 1 1 1
NC_035159.1:927-1303 NC_035159.1:9272-9339 NC_035159.1:9274-12128 NC_035159.1:9329-9419 NC_035159.1:9374-11878 NC_035159.1:9398-9526 NC_035159.1:9469-9528
1 1 1 1 1 1 1
NC_035159.1:9489-11210 NC_035159.1:9520-9778 NC_035159.1:9524-9948 NC_035159.1:9530-9556 NC_035159.1:9538-9560 NC_035159.1:9538-9628 NC_035159.1:9539-9614
1 1 1 1 1 1 1
NC_035159.1:9542-9725 NC_035159.1:9548-9653 NC_035159.1:9550-9690 NC_035159.1:9558-9639 NC_035159.1:9582-9638 NC_035159.1:9603-9702 NC_035159.1:9611-9979
1 1 1 1 1 1 1
NC_035159.1:9621-10162 NC_035159.1:9665-9870 <NA> <NA> <NA> <NA> <NA>
1 1
<NA> <NA> <NA> <NA> <NA> <NA> <NA>
<NA> <NA> <NA> <NA> <NA> <NA> <NA>
<NA> <NA> <NA> <NA> <NA> <NA> <NA>
<NA> <NA> <NA> <NA> <NA> <NA> <NA>
combined_list_of_regions_ID %>% dplyr::group_by(pos_chr_start_end) %>% dplyr::mutate(counts_seqs=n()) %>% dplyr::filter(counts_seqs > 1)
library(ggplot2)
pp <- ggplot(combined_list_of_regions_ID %>% dplyr::group_by(pos_chr_start_end) %>% dplyr::arrange(.by_group = TRUE) %>% dplyr::mutate(counts_seqs=n()) %>% dplyr::filter(counts_seqs > 1),
aes(x = pos_chr_start_end,
y = as.factor(chrom),
color=pos_chr_start_end)) +
#geom_histogram(stat="count") + #binwidth = 1) +
#geom_point(color="red") +
geom_point(position = position_jitter(width = 0.25,height = 0.25,seed = 123456),size=3,alpha=0.5) +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
#scale_y_discrete(chrom) +
#coord_flip() +
#cut("chr") +
#geom_text(stat = "count", aes(label = ifelse(count >= 2, count, "")), vjust = -0.25) +
xlab("Position") +
ylab("Count") +
facet_wrap(~ID,ncol=1) +
ggtitle("Points of NUMT(s) Positions")
pp
dev.off()
null device
1
write.table(combined_list_of_regions_ID %>% dplyr::group_by(pos_chr_start_end) %>% dplyr::arrange(.by_group = TRUE) %>% dplyr::mutate(counts_seqs=n()),file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/Postions_in_CHR_of_numts.csv",sep = ",")
library(plyranges)
names(list_of_regions)
[1] "WGS_017_E20_F03" "WGS_021_E20_F02" "WGS_024_E20_F02" "WGS_025_E20_F02" "WGS_038_E20_F07" "WGS_040_E20_F02" "WGS_042_E20_F02" "WGS_043_E20_F02" "WGS_047_E20_F02" "WGS_050_E20_F02"
[11] "WGS_050_E20_F03" "WGS_060_E20_F06" "WGS_063_E20_F02" "WGS_080_E20_F02" "WGS_084_E20_F02" "WGS_088_E20_F05" "WGS_088_E20_F06" "WGS_095_E20_F11" "WGS_115_E20_F03" "WGS_132_E20_F10"
[21] "WGS_134_E20_F02" "WGS_211_E20_F02" "WGS_220_E20_F07" "WGS_230_E20_F06" "WGS_240_E20_F05" "WGS_246_E20_F05" "WGS_246_E20_F07" "WGS_249_E20_F02" "WGS_252_E20_F02" "WGS_253_E20_F01"
[31] "WGS_262_E20_F07" "WGS_266_E20_F04" "WGS_289_E20_F02" "WGS_299_E20_F09" "WGS_300_E20_F05"
list_of_regions_ID
[[1]]
[[2]]
[[3]]
[[4]]
[[5]]
[[6]]
[[7]]
[[8]]
[[9]]
[[10]]
[[11]]
[[12]]
[[13]]
[[14]]
[[15]]
[[16]]
[[17]]
[[18]]
[[19]]
[[20]]
[[21]]
[[22]]
[[23]]
[[24]]
[[25]]
[[26]]
[[27]]
[[28]]
[[29]]
[[30]]
[[31]]
[[32]]
[[33]]
[[34]]
[[35]]
#https://bioconductor.org/packages/devel/bioc/vignettes/GenomicFeatures/inst/doc/GenomicFeatures.html
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(GenomicRanges)
#Downloaded from NCBI
gff_ae_txdb <- makeTxDbFromGFF(file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/GCF_002204515.2_AaegL5.0_genomic.gff",format = "gff3",dataSource = "NCBI",organism = "Aedes aegypti",)
Import genomic features from the file as a GRanges object ... Warning in for (k in seq_along(lens)) { :
closing unused connection 3 (/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Data/GCF_002204515.2_AaegL5.0_genomic.gff)
OK
Prepare the 'metadata' data frame ... OK
Make the TxDb object ... Warning in .extract_transcripts_from_GRanges(tx_IDX, gr, mcols0$type, mcols0$ID, :
some transcripts have no "transcript_id" attribute ==> their name ("tx_name" column in the TxDb object) was set to NA
Warning in .extract_transcripts_from_GRanges(tx_IDX, gr, mcols0$type, mcols0$ID, :
the transcript names ("tx_name" column in the TxDb object) imported from the "transcript_id" attribute are not unique
OK
#This were selected using cut,awk,grep by command line
genes(gff_ae_txdb)
GRanges object with 19623 ranges and 1 metadata column:
seqnames ranges strand | gene_id
<Rle> <IRanges> <Rle> | <character>
CFI06_mgp01 NC_035159.1 11547-12488 - | CFI06_mgp01
CFI06_mgp02 NC_035159.1 10323-11457 + | CFI06_mgp02
CFI06_mgp03 NC_035159.1 9798-10319 + | CFI06_mgp03
CFI06_mgp04 NC_035159.1 9364-9660 - | CFI06_mgp04
CFI06_mgp05 NC_035159.1 8027-9370 - | CFI06_mgp05
... ... ... ... . ...
Trnay-gua-5 NC_035108.1 14298210-14298304 + | Trnay-gua-5
Trnay-gua-6 NC_035108.1 15183501-15183595 + | Trnay-gua-6
Trnay-gua-7 NC_035108.1 47870658-47870755 + | Trnay-gua-7
Trnay-gua-8 NC_035108.1 47871172-47871265 + | Trnay-gua-8
Trnay-gua-9 NC_035108.1 47934762-47934891 - | Trnay-gua-9
-------
seqinfo: 798 sequences from an unspecified genome; no seqlengths
head(seqlevels(gff_ae_txdb))
[1] "NC_035107.1" "NC_035108.1" "NC_035109.1" "NC_035159.1" "NW_018734409.1" "NW_018734412.1"
#select only mt
columns(gff_ae_txdb)
[1] "CDSCHROM" "CDSEND" "CDSID" "CDSNAME" "CDSPHASE" "CDSSTART" "CDSSTRAND" "EXONCHROM" "EXONEND" "EXONID" "EXONNAME" "EXONRANK" "EXONSTART" "EXONSTRAND"
[15] "GENEID" "TXCHROM" "TXEND" "TXID" "TXNAME" "TXSTART" "TXSTRAND" "TXTYPE"
seqlevels(gff_ae_txdb) <- "NC_035159.1"
#from the fasta /locus_tag="CFI06_mgp11" /db_xref="GeneID:33307558" CDS 2903..3587 /gene="COX2"
#cox2,atp8,atp6,cox3
#gene names are not correctly extrracted ffrom the gff file
keys_2_aaeg <- c("CFI06_mgp12","CFI06_mgp10","CFI06_mgp09","CFI06_mgp08")
keytypes(gff_ae_txdb)
[1] "CDSID" "CDSNAME" "EXONID" "EXONNAME" "GENEID" "TXID" "TXNAME"
columns(gff_ae_txdb)
[1] "CDSCHROM" "CDSEND" "CDSID" "CDSNAME" "CDSPHASE" "CDSSTART" "CDSSTRAND" "EXONCHROM" "EXONEND" "EXONID" "EXONNAME" "EXONRANK" "EXONSTART" "EXONSTRAND"
[15] "GENEID" "TXCHROM" "TXEND" "TXID" "TXNAME" "TXSTART" "TXSTRAND" "TXTYPE"
keys(gff_ae_txdb)
[1] "CFI06_mgp01" "CFI06_mgp02" "CFI06_mgp03" "CFI06_mgp04" "CFI06_mgp05" "CFI06_mgp06" "CFI06_mgp07" "CFI06_mgp08" "CFI06_mgp09" "CFI06_mgp10" "CFI06_mgp11" "CFI06_mgp12"
[13] "CFI06_mgp13" "CFI06_mgr01" "CFI06_mgr02" "CFI06_mgt01" "CFI06_mgt02" "CFI06_mgt03" "CFI06_mgt04" "CFI06_mgt05" "CFI06_mgt06" "CFI06_mgt07" "CFI06_mgt08" "CFI06_mgt09"
[25] "CFI06_mgt10" "CFI06_mgt11" "CFI06_mgt12" "CFI06_mgt13" "CFI06_mgt14" "CFI06_mgt15" "CFI06_mgt16" "CFI06_mgt17" "CFI06_mgt18" "CFI06_mgt19" "CFI06_mgt20" "CFI06_mgt21"
[37] "CFI06_mgt22" "CFI06_mgt23" "LOC110673976" "LOC110673977" "LOC110673978" "LOC110673979" "LOC110673980" "LOC110673981" "LOC110673982" "LOC110673983" "LOC110673984" "LOC110673985"
[49] "LOC110673986" "LOC110673987" "LOC110673988" "LOC110673989" "LOC110673990" "LOC110673991" "LOC110673992" "LOC110673993" "LOC110673994" "LOC110673995" "LOC110673996" "LOC110673997"
[61] "LOC110673998" "LOC110673999" "LOC110674000" "LOC110674001" "LOC110674002" "LOC110674003" "LOC110674004" "LOC110674005" "LOC110674006" "LOC110674007" "LOC110674008" "LOC110674009"
[73] "LOC110674010" "LOC110674011" "LOC110674012" "LOC110674013" "LOC110674014" "LOC110674015" "LOC110674016" "LOC110674017" "LOC110674018" "LOC110674019" "LOC110674020" "LOC110674021"
[85] "LOC110674022" "LOC110674023" "LOC110674024" "LOC110674025" "LOC110674026" "LOC110674027" "LOC110674028" "LOC110674029" "LOC110674030" "LOC110674031" "LOC110674032" "LOC110674033"
[97] "LOC110674034" "LOC110674035" "LOC110674036" "LOC110674037" "LOC110674038" "LOC110674039" "LOC110674040" "LOC110674041" "LOC110674042" "LOC110674043" "LOC110674044" "LOC110674045"
[109] "LOC110674046" "LOC110674047" "LOC110674048" "LOC110674049" "LOC110674050" "LOC110674051" "LOC110674052" "LOC110674053" "LOC110674054" "LOC110674055" "LOC110674056" "LOC110674057"
[121] "LOC110674058" "LOC110674059" "LOC110674060" "LOC110674061" "LOC110674062" "LOC110674063" "LOC110674064" "LOC110674065" "LOC110674066" "LOC110674067" "LOC110674068" "LOC110674069"
[133] "LOC110674070" "LOC110674071" "LOC110674072" "LOC110674073" "LOC110674074" "LOC110674075" "LOC110674076" "LOC110674077" "LOC110674078" "LOC110674079" "LOC110674080" "LOC110674081"
[145] "LOC110674082" "LOC110674083" "LOC110674084" "LOC110674085" "LOC110674086" "LOC110674087" "LOC110674088" "LOC110674089" "LOC110674090" "LOC110674091" "LOC110674092" "LOC110674093"
[157] "LOC110674094" "LOC110674095" "LOC110674096" "LOC110674097" "LOC110674098" "LOC110674099" "LOC110674100" "LOC110674101" "LOC110674102" "LOC110674103" "LOC110674104" "LOC110674105"
[169] "LOC110674106" "LOC110674107" "LOC110674108" "LOC110674109" "LOC110674110" "LOC110674111" "LOC110674112" "LOC110674113" "LOC110674114" "LOC110674115" "LOC110674116" "LOC110674117"
[181] "LOC110674118" "LOC110674119" "LOC110674120" "LOC110674121" "LOC110674122" "LOC110674123" "LOC110674124" "LOC110674125" "LOC110674126" "LOC110674127" "LOC110674128" "LOC110674129"
[193] "LOC110674130" "LOC110674131" "LOC110674132" "LOC110674133" "LOC110674134" "LOC110674135" "LOC110674136" "LOC110674137" "LOC110674138" "LOC110674139" "LOC110674140" "LOC110674141"
[205] "LOC110674142" "LOC110674143" "LOC110674144" "LOC110674145" "LOC110674146" "LOC110674147" "LOC110674148" "LOC110674149" "LOC110674150" "LOC110674151" "LOC110674152" "LOC110674153"
[217] "LOC110674154" "LOC110674155" "LOC110674156" "LOC110674157" "LOC110674158" "LOC110674159" "LOC110674160" "LOC110674161" "LOC110674162" "LOC110674163" "LOC110674164" "LOC110674165"
[229] "LOC110674166" "LOC110674167" "LOC110674168" "LOC110674169" "LOC110674170" "LOC110674171" "LOC110674172" "LOC110674173" "LOC110674174" "LOC110674175" "LOC110674176" "LOC110674177"
[241] "LOC110674178" "LOC110674179" "LOC110674180" "LOC110674181" "LOC110674182" "LOC110674183" "LOC110674184" "LOC110674185" "LOC110674186" "LOC110674187" "LOC110674188" "LOC110674189"
[253] "LOC110674190" "LOC110674191" "LOC110674192" "LOC110674193" "LOC110674194" "LOC110674195" "LOC110674196" "LOC110674197" "LOC110674198" "LOC110674199" "LOC110674200" "LOC110674201"
[265] "LOC110674202" "LOC110674203" "LOC110674204" "LOC110674205" "LOC110674206" "LOC110674207" "LOC110674208" "LOC110674209" "LOC110674210" "LOC110674211" "LOC110674212" "LOC110674213"
[277] "LOC110674214" "LOC110674215" "LOC110674216" "LOC110674217" "LOC110674218" "LOC110674219" "LOC110674220" "LOC110674221" "LOC110674222" "LOC110674223" "LOC110674224" "LOC110674225"
[289] "LOC110674226" "LOC110674227" "LOC110674228" "LOC110674229" "LOC110674230" "LOC110674231" "LOC110674232" "LOC110674233" "LOC110674234" "LOC110674235" "LOC110674236" "LOC110674237"
[301] "LOC110674238" "LOC110674239" "LOC110674240" "LOC110674241" "LOC110674242" "LOC110674243" "LOC110674244" "LOC110674245" "LOC110674246" "LOC110674247" "LOC110674248" "LOC110674249"
[313] "LOC110674250" "LOC110674251" "LOC110674252" "LOC110674253" "LOC110674254" "LOC110674255" "LOC110674256" "LOC110674257" "LOC110674258" "LOC110674259" "LOC110674260" "LOC110674261"
[325] "LOC110674262" "LOC110674263" "LOC110674264" "LOC110674265" "LOC110674266" "LOC110674267" "LOC110674268" "LOC110674269" "LOC110674270" "LOC110674271" "LOC110674272" "LOC110674273"
[337] "LOC110674274" "LOC110674275" "LOC110674276" "LOC110674277" "LOC110674278" "LOC110674279" "LOC110674280" "LOC110674281" "LOC110674282" "LOC110674283" "LOC110674284" "LOC110674285"
[349] "LOC110674286" "LOC110674287" "LOC110674288" "LOC110674289" "LOC110674290" "LOC110674291" "LOC110674292" "LOC110674293" "LOC110674294" "LOC110674295" "LOC110674296" "LOC110674297"
[361] "LOC110674298" "LOC110674299" "LOC110674300" "LOC110674301" "LOC110674302" "LOC110674303" "LOC110674304" "LOC110674305" "LOC110674306" "LOC110674307" "LOC110674308" "LOC110674309"
[373] "LOC110674310" "LOC110674311" "LOC110674312" "LOC110674313" "LOC110674314" "LOC110674315" "LOC110674316" "LOC110674317" "LOC110674318" "LOC110674319" "LOC110674320" "LOC110674321"
[385] "LOC110674322" "LOC110674323" "LOC110674324" "LOC110674325" "LOC110674326" "LOC110674327" "LOC110674328" "LOC110674329" "LOC110674330" "LOC110674331" "LOC110674332" "LOC110674333"
[397] "LOC110674334" "LOC110674335" "LOC110674336" "LOC110674337" "LOC110674338" "LOC110674339" "LOC110674340" "LOC110674341" "LOC110674342" "LOC110674343" "LOC110674344" "LOC110674345"
[409] "LOC110674346" "LOC110674347" "LOC110674348" "LOC110674349" "LOC110674350" "LOC110674351" "LOC110674352" "LOC110674353" "LOC110674354" "LOC110674355" "LOC110674356" "LOC110674357"
[421] "LOC110674358" "LOC110674359" "LOC110674360" "LOC110674361" "LOC110674362" "LOC110674363" "LOC110674364" "LOC110674365" "LOC110674366" "LOC110674367" "LOC110674368" "LOC110674369"
[433] "LOC110674370" "LOC110674371" "LOC110674372" "LOC110674373" "LOC110674374" "LOC110674375" "LOC110674376" "LOC110674377" "LOC110674378" "LOC110674379" "LOC110674380" "LOC110674381"
[445] "LOC110674382" "LOC110674383" "LOC110674384" "LOC110674385" "LOC110674386" "LOC110674387" "LOC110674388" "LOC110674389" "LOC110674390" "LOC110674391" "LOC110674392" "LOC110674393"
[457] "LOC110674394" "LOC110674395" "LOC110674396" "LOC110674397" "LOC110674398" "LOC110674399" "LOC110674400" "LOC110674401" "LOC110674402" "LOC110674403" "LOC110674404" "LOC110674405"
[469] "LOC110674406" "LOC110674407" "LOC110674408" "LOC110674409" "LOC110674410" "LOC110674411" "LOC110674412" "LOC110674413" "LOC110674414" "LOC110674415" "LOC110674416" "LOC110674417"
[481] "LOC110674418" "LOC110674419" "LOC110674420" "LOC110674421" "LOC110674422" "LOC110674423" "LOC110674424" "LOC110674425" "LOC110674426" "LOC110674427" "LOC110674428" "LOC110674429"
[493] "LOC110674430" "LOC110674431" "LOC110674432" "LOC110674433" "LOC110674434" "LOC110674435" "LOC110674436" "LOC110674437" "LOC110674438" "LOC110674439" "LOC110674440" "LOC110674441"
[505] "LOC110674442" "LOC110674443" "LOC110674444" "LOC110674445" "LOC110674446" "LOC110674447" "LOC110674448" "LOC110674449" "LOC110674450" "LOC110674451" "LOC110674452" "LOC110674453"
[517] "LOC110674454" "LOC110674455" "LOC110674456" "LOC110674457" "LOC110674458" "LOC110674459" "LOC110674460" "LOC110674461" "LOC110674462" "LOC110674463" "LOC110674464" "LOC110674465"
[529] "LOC110674466" "LOC110674467" "LOC110674468" "LOC110674469" "LOC110674470" "LOC110674471" "LOC110674472" "LOC110674473" "LOC110674474" "LOC110674475" "LOC110674476" "LOC110674477"
[541] "LOC110674478" "LOC110674479" "LOC110674480" "LOC110674481" "LOC110674482" "LOC110674483" "LOC110674484" "LOC110674485" "LOC110674486" "LOC110674487" "LOC110674488" "LOC110674489"
[553] "LOC110674490" "LOC110674491" "LOC110674492" "LOC110674493" "LOC110674494" "LOC110674495" "LOC110674496" "LOC110674497" "LOC110674498" "LOC110674499" "LOC110674500" "LOC110674501"
[565] "LOC110674502" "LOC110674503" "LOC110674504" "LOC110674505" "LOC110674506" "LOC110674507" "LOC110674508" "LOC110674509" "LOC110674510" "LOC110674511" "LOC110674512" "LOC110674513"
[577] "LOC110674514" "LOC110674515" "LOC110674516" "LOC110674517" "LOC110674518" "LOC110674519" "LOC110674520" "LOC110674521" "LOC110674522" "LOC110674523" "LOC110674524" "LOC110674525"
[589] "LOC110674526" "LOC110674527" "LOC110674528" "LOC110674529" "LOC110674530" "LOC110674531" "LOC110674532" "LOC110674533" "LOC110674534" "LOC110674535" "LOC110674536" "LOC110674537"
[601] "LOC110674538" "LOC110674539" "LOC110674540" "LOC110674541" "LOC110674542" "LOC110674543" "LOC110674544" "LOC110674545" "LOC110674546" "LOC110674547" "LOC110674548" "LOC110674549"
[613] "LOC110674550" "LOC110674551" "LOC110674552" "LOC110674553" "LOC110674554" "LOC110674555" "LOC110674556" "LOC110674557" "LOC110674558" "LOC110674559" "LOC110674560" "LOC110674561"
[625] "LOC110674562" "LOC110674563" "LOC110674564" "LOC110674565" "LOC110674566" "LOC110674567" "LOC110674568" "LOC110674569" "LOC110674570" "LOC110674571" "LOC110674572" "LOC110674573"
[637] "LOC110674574" "LOC110674575" "LOC110674576" "LOC110674577" "LOC110674578" "LOC110674579" "LOC110674580" "LOC110674581" "LOC110674582" "LOC110674583" "LOC110674584" "LOC110674585"
[649] "LOC110674586" "LOC110674587" "LOC110674588" "LOC110674589" "LOC110674590" "LOC110674591" "LOC110674592" "LOC110674593" "LOC110674594" "LOC110674595" "LOC110674596" "LOC110674597"
[661] "LOC110674598" "LOC110674599" "LOC110674600" "LOC110674601" "LOC110674602" "LOC110674603" "LOC110674604" "LOC110674605" "LOC110674606" "LOC110674607" "LOC110674608" "LOC110674609"
[673] "LOC110674610" "LOC110674611" "LOC110674612" "LOC110674613" "LOC110674614" "LOC110674615" "LOC110674616" "LOC110674617" "LOC110674618" "LOC110674619" "LOC110674620" "LOC110674621"
[685] "LOC110674622" "LOC110674623" "LOC110674624" "LOC110674625" "LOC110674626" "LOC110674627" "LOC110674628" "LOC110674629" "LOC110674630" "LOC110674631" "LOC110674632" "LOC110674633"
[697] "LOC110674634" "LOC110674635" "LOC110674636" "LOC110674637" "LOC110674638" "LOC110674639" "LOC110674640" "LOC110674641" "LOC110674642" "LOC110674643" "LOC110674644" "LOC110674645"
[709] "LOC110674646" "LOC110674647" "LOC110674648" "LOC110674649" "LOC110674650" "LOC110674651" "LOC110674652" "LOC110674653" "LOC110674654" "LOC110674655" "LOC110674656" "LOC110674657"
[721] "LOC110674658" "LOC110674659" "LOC110674660" "LOC110674661" "LOC110674662" "LOC110674663" "LOC110674664" "LOC110674665" "LOC110674666" "LOC110674667" "LOC110674668" "LOC110674669"
[733] "LOC110674670" "LOC110674671" "LOC110674672" "LOC110674673" "LOC110674674" "LOC110674675" "LOC110674676" "LOC110674677" "LOC110674678" "LOC110674679" "LOC110674680" "LOC110674681"
[745] "LOC110674682" "LOC110674683" "LOC110674684" "LOC110674685" "LOC110674686" "LOC110674687" "LOC110674688" "LOC110674689" "LOC110674690" "LOC110674691" "LOC110674692" "LOC110674693"
[757] "LOC110674694" "LOC110674695" "LOC110674696" "LOC110674697" "LOC110674698" "LOC110674699" "LOC110674700" "LOC110674701" "LOC110674702" "LOC110674703" "LOC110674704" "LOC110674705"
[769] "LOC110674706" "LOC110674707" "LOC110674708" "LOC110674709" "LOC110674710" "LOC110674711" "LOC110674712" "LOC110674713" "LOC110674714" "LOC110674715" "LOC110674716" "LOC110674717"
[781] "LOC110674718" "LOC110674719" "LOC110674720" "LOC110674721" "LOC110674722" "LOC110674723" "LOC110674724" "LOC110674725" "LOC110674726" "LOC110674727" "LOC110674728" "LOC110674729"
[793] "LOC110674730" "LOC110674731" "LOC110674732" "LOC110674733" "LOC110674734" "LOC110674735" "LOC110674736" "LOC110674737" "LOC110674738" "LOC110674739" "LOC110674740" "LOC110674741"
[805] "LOC110674742" "LOC110674743" "LOC110674744" "LOC110674745" "LOC110674746" "LOC110674747" "LOC110674748" "LOC110674749" "LOC110674750" "LOC110674751" "LOC110674752" "LOC110674753"
[817] "LOC110674754" "LOC110674755" "LOC110674756" "LOC110674757" "LOC110674758" "LOC110674759" "LOC110674760" "LOC110674761" "LOC110674762" "LOC110674763" "LOC110674764" "LOC110674765"
[829] "LOC110674766" "LOC110674767" "LOC110674768" "LOC110674769" "LOC110674770" "LOC110674771" "LOC110674772" "LOC110674773" "LOC110674774" "LOC110674775" "LOC110674776" "LOC110674777"
[841] "LOC110674778" "LOC110674779" "LOC110674780" "LOC110674781" "LOC110674782" "LOC110674783" "LOC110674784" "LOC110674785" "LOC110674786" "LOC110674787" "LOC110674788" "LOC110674789"
[853] "LOC110674790" "LOC110674791" "LOC110674792" "LOC110674793" "LOC110674794" "LOC110674795" "LOC110674796" "LOC110674797" "LOC110674798" "LOC110674799" "LOC110674800" "LOC110674801"
[865] "LOC110674802" "LOC110674803" "LOC110674804" "LOC110674805" "LOC110674806" "LOC110674807" "LOC110674808" "LOC110674809" "LOC110674810" "LOC110674811" "LOC110674812" "LOC110674813"
[877] "LOC110674814" "LOC110674815" "LOC110674816" "LOC110674817" "LOC110674818" "LOC110674819" "LOC110674820" "LOC110674821" "LOC110674822" "LOC110674823" "LOC110674824" "LOC110674825"
[889] "LOC110674826" "LOC110674827" "LOC110674828" "LOC110674829" "LOC110674830" "LOC110674831" "LOC110674832" "LOC110674833" "LOC110674834" "LOC110674835" "LOC110674836" "LOC110674837"
[901] "LOC110674838" "LOC110674839" "LOC110674840" "LOC110674841" "LOC110674842" "LOC110674843" "LOC110674844" "LOC110674845" "LOC110674846" "LOC110674847" "LOC110674848" "LOC110674849"
[913] "LOC110674850" "LOC110674851" "LOC110674852" "LOC110674853" "LOC110674854" "LOC110674855" "LOC110674856" "LOC110674857" "LOC110674858" "LOC110674859" "LOC110674860" "LOC110674861"
[925] "LOC110674862" "LOC110674863" "LOC110674864" "LOC110674865" "LOC110674866" "LOC110674867" "LOC110674868" "LOC110674869" "LOC110674870" "LOC110674871" "LOC110674872" "LOC110674873"
[937] "LOC110674874" "LOC110674875" "LOC110674876" "LOC110674877" "LOC110674878" "LOC110674879" "LOC110674880" "LOC110674881" "LOC110674882" "LOC110674883" "LOC110675039" "LOC110675050"
[949] "LOC110675094" "LOC110675095" "LOC110675096" "LOC110675097" "LOC110675098" "LOC110675099" "LOC110675100" "LOC110675101" "LOC110675102" "LOC110675103" "LOC110675104" "LOC110675105"
[961] "LOC110675106" "LOC110675107" "LOC110675108" "LOC110675109" "LOC110675110" "LOC110675111" "LOC110675112" "LOC110675113" "LOC110675114" "LOC110675115" "LOC110675116" "LOC110675117"
[973] "LOC110675118" "LOC110675119" "LOC110675120" "LOC110675121" "LOC110675122" "LOC110675123" "LOC110675124" "LOC110675125" "LOC110675126" "LOC110675127" "LOC110675128" "LOC110675129"
[985] "LOC110675130" "LOC110675131" "LOC110675132" "LOC110675133" "LOC110675134" "LOC110675135" "LOC110675136" "LOC110675137" "LOC110675138" "LOC110675139" "LOC110675140" "LOC110675141"
[997] "LOC110675142" "LOC110675143" "LOC110675144" "LOC110675145"
[ reached getOption("max.print") -- omitted 18623 entries ]
AnnotationDbi::select(gff_ae_txdb,keys = keys_2_aaeg,columns=columns(gff_ae_txdb), keytype="GENEID")
'select()' returned 1:1 mapping between keys and columns
#AnnotationDbi::select(gff_ae_txdb,keys = keys_2_aaeg,columns=c("TXSTART","TXEND","TXNAME","CDSNAME","EXONNAME"), keytype="GENEID")
#retrieve all the transcripts from mitochondrial genome using this function as a granges
gr_aaeg_transcripts <- transcripts(gff_ae_txdb)
gr_aaeg_genes <- genes(gff_ae_txdb)
gr_aaeg_genes[1:3]
GRanges object with 3 ranges and 1 metadata column:
seqnames ranges strand | gene_id
<Rle> <IRanges> <Rle> | <character>
CFI06_mgp01 NC_035159.1 11547-12488 - | CFI06_mgp01
CFI06_mgp02 NC_035159.1 10323-11457 + | CFI06_mgp02
CFI06_mgp03 NC_035159.1 9798-10319 + | CFI06_mgp03
-------
seqinfo: 1 sequence from an unspecified genome; no seqlengths
gr_aaeg_transcripts[1:3]
GRanges object with 3 ranges and 2 metadata columns:
seqnames ranges strand | tx_id tx_name
<Rle> <IRanges> <Rle> | <integer> <character>
[1] NC_035159.1 1-69 + | 32449 <NA>
[2] NC_035159.1 70-1095 + | 32450 <NA>
[3] NC_035159.1 1096-1165 + | 32451 <NA>
-------
seqinfo: 1 sequence from an unspecified genome; no seqlengths
### Create list of granges to count overlaps of the samples for the different numts.
lists_of_granges_numts_chr_detail <- lapply(list_of_regions_ID,function(df) {
#grange_list <- list()
#grange_list[[length(grange_list) + 1]]
df %>% dplyr::mutate(pos_chr_start_end = paste0("chrM:",start,"-",end,"-origin-",chrom)) %>% dplyr::select(seqnames,start,end,strand,chrom,ID,pos_chr_start_end) %>% plyranges::as_granges()})
#grange_list <-
#return(grange_list)
#Cerate a list of ranges to overlap to the genes in the CHR MT of Aedes aegypti
granges_list_aeag <- GRangesList(lists_of_granges_numts_chr_detail)
##find overlaps using plyranges (tutorial is really helpful)
gr_aaeg_genes <- sort(gr_aaeg_genes)
result_lists_aaeg <- list()
result_lists_aaeg_genes_hit <- list()
for (i in seq_along(1:length(granges_list_aeag))){
print(i)
result_lists_aaeg[[mcols(granges_list_aeag[[i]])[[2]][1]]] <- granges_list_aeag[[i]] %>% join_overlap_inner(gr_aaeg_genes)
result_lists_aaeg_genes_hit[[mcols(granges_list_aeag[[i]])[[2]][1]]] <- result_lists_aaeg[[i]] %>% as.tibble() %>%
dplyr::group_by(gene_id) %>%
dplyr::summarise(numts_hitting_X_time_the_gene=n())
}
[1] 1
Warning: `as.tibble()` was deprecated in tibble 2.0.0.
ℹ Please use `as_tibble()` instead.
ℹ The signature and semantics have changed, see `?as_tibble`.
This warning is displayed once every 8 hours.
Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
# result_lists_aaeg
# result_lists_aaeg_genes_hit
converter_g <- read.table(file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Data/genes_dbxref.csv",sep = ",")
converter_t <- read.table(file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Data/trnas_dbxref.csv",sep = ",")
converter_t <- converter_t[,c(2,1)]
names(converter_t) <- c("V1","V2")
converter_gt <- rbind(converter_g,converter_t)
names(converter_gt) <- c("symbol","dbxref")
### Convert the dbxref genen names to symbols
result_lists_aaeg_genes_hit_with_symbol <- lapply(result_lists_aaeg_genes_hit,function(nn) nn %>% dplyr::left_join(converter_gt,by = c("gene_id" = "dbxref")))
result_lists_aaeg_genes_hit_with_symbol_counts <- lapply(result_lists_aaeg_genes_hit_with_symbol,function(cc) cc %>% dplyr::group_by(symbol) %>% dplyr::summarize(count=dplyr::n(),col2_sum=sum(numts_hitting_X_time_the_gene)))
result_lists_aaeg_genes_hit_with_symbol_counts
$WGS_017_E20_F03
$WGS_021_E20_F02
$WGS_024_E20_F02
$WGS_025_E20_F02
$WGS_038_E20_F07
$WGS_040_E20_F02
$WGS_042_E20_F02
$WGS_043_E20_F02
$WGS_047_E20_F02
$WGS_050_E20_F02
$WGS_050_E20_F03
$WGS_060_E20_F06
$WGS_063_E20_F02
$WGS_080_E20_F02
$WGS_084_E20_F02
$WGS_088_E20_F05
$WGS_088_E20_F06
$WGS_095_E20_F11
$WGS_115_E20_F03
$WGS_132_E20_F10
$WGS_134_E20_F02
$WGS_211_E20_F02
$WGS_220_E20_F07
$WGS_230_E20_F06
$WGS_240_E20_F05
$WGS_246_E20_F05
$WGS_246_E20_F07
$WGS_249_E20_F02
$WGS_252_E20_F02
$WGS_253_E20_F01
$WGS_262_E20_F07
$WGS_266_E20_F04
$WGS_289_E20_F02
$WGS_299_E20_F09
$WGS_300_E20_F05
NA
#mm="WGS_088_E20_F05"
# result_lists_aaeg_genes_hit_with_symbol_counts[[""]]
lapply(names(result_lists_aaeg_genes_hit_with_symbol_counts),function(mm) {
print(paste0("plotting for sample", mm))
#png(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/Visualization_samples/R_CIRCOS_RIL/",df[["V2"]][1],".png"),width = 680,height = 680)
#svg(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/RIL_mtDNA_overlap_numts_",mm,"_circos.svg"),width = 9,height = 9)
mt_genes_ae_2 <- mt_genes_ae
mt_genes_ae_2$start <- mt_genes_ae_2$start/100000
mt_genes_ae_2$end <- mt_genes_ae_2$end/100000
numts_hits_df_merged_counted_start_end <- mt_genes_ae_2 %>% dplyr::select(chr,start,end,gene) %>%
dplyr::right_join(result_lists_aaeg_genes_hit_with_symbol_counts[[mm]],by = c("gene"="symbol")) %>%
dplyr::mutate(value=col2_sum) %>%
dplyr::select(chr,start,end,value,gene) %>%
dplyr::arrange(start,end)
numts_hits_df_merged_counted_start_end$value_scaled <- numts_hits_df_merged_counted_start_end$value/10
library(circlize)
ref_fd_ae <- data.frame("Chromosome"=c("chrM"),"ChromStart"=c(1),"Chromend"=c(16790))
#circos.genomicInitialize(ref_fd_ae)
circos.genomicInitialize(ref_fd_ae,plotType = NULL)
circos.genomicLabels(mt_genes_ae_2 ,
labels.column = 5,
cex=0.7,line_lwd=0.6, line_col="grey20",
connection_height = 0.019,
col=ifelse(mt_genes_ae_2$gene %in% numts_hits_df_merged_counted_start_end$gene[numts_hits_df_merged_counted_start_end$value_scaled > mean(numts_hits_df_merged_counted_start_end$value_scaled)],"red","black"),
labels_height = 0.2,niceFacing = TRUE,side = "outside")
# circos.genomicLabels(mt_genes_2 ,labels.column = 5, cex=0.5,line_lwd=0.6, line_col="grey20", connection_height = 0.05, labels_height = 0.2,niceFacing = TRUE,side = "outside")
circos.track(ylim=c(0, 1), panel.fun=function(x, y) {
chr=CELL_META$sector.index
xlim=CELL_META$xlim
ylim=CELL_META$ylim#
circos.text(mean(xlim), mean(ylim), chr, cex=0.8)}, bg.col = c("white"), bg.border=F, track.height=0.2) # the track height of the text specifies how thick is the track were we plot the bars
circos.track(track.index = get.current.track.index(),
track.height=0.8,
ylim=c(0,1), #this track y lim specifies how big are the bars up to in the track y lim
panel.fun = function(x, y) {
circos.genomicAxis(h = "bottom", direction = "inside",labels.cex = 0.6,tickLabelsStartFromZero = FALSE)})
# set_track_gap(gap = 0.05)
# ddff <- mt_genes_ae_2 %>% dplyr::select(chr,start,end,gene) %>% dplyr::right_join(result_lists_aaeg_genes_hit_with_symbol_counts$WGS_211_E20_F02,by = c("gene"="symbol")) %>%dplyr::mutate(value=col2_sum) %>% dplyr::select(chr,start,end,value,gene) %>% dplyr::arrange(start,end)
#
# ddff_2 <- mt_genes_ae_2 %>% dplyr::select(chr,start,end,gene) %>% dplyr::right_join(result_lists_aaeg_genes_hit_with_symbol_counts$WGS_038_E20_F07,by = c("gene"="symbol")) %>% dplyr::mutate(value=col2_sum) %>% dplyr::select(chr,start,end,value,gene) %>% dplyr::arrange(start,end)
library(RColorBrewer)
green_pal <- colorRampPalette(brewer.pal(9, "Greens"))
map_value_to_color <- function(value) {
breaks <- seq(0,8,0.9)
colors <- green_pal(9)
colors[findInterval(value, breaks)]
}
circos.rect(xleft = numts_hits_df_merged_counted_start_end$start,
ybottom = 0.01,
xright = numts_hits_df_merged_counted_start_end$end,
ytop = 0.02 + numts_hits_df_merged_counted_start_end$value_scaled,
#col = rgb(0.4, 0.7, 0.4, alpha = 0.5),
#col = ("#95D5B2"),
col = map_value_to_color(numts_hits_df_merged_counted_start_end$value_scaled),
#col = my_colors_scaled,
border = "black")
#circos.lines(CELL_META$cell.xlim, c(max(numts_hits_df_merged_counted_start_end$value_scaled)+0.02, max(numts_hits_df_merged_counted_start_end$value_scaled)+0.2),lwd = 2, lty = 2, col = "#A71246")
#circos.lines(CELL_META$cell.xlim, c(min(numts_hits_df_merged_counted_start_end$value)+5, min(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "blue")
circos.lines(CELL_META$cell.xlim, c(mean(numts_hits_df_merged_counted_start_end$value_scaled)+0.01, mean(numts_hits_df_merged_counted_start_end$value_scaled)+0.01),lwd = 2, lty = 2, col = "#03071E")
#dev.off()
#result_lists_aaeg_genes_hit_with_symbol_counts[[mm]]
})
[1] "plotting for sampleWGS_017_E20_F03"
[1] "plotting for sampleWGS_021_E20_F02"
[1] "plotting for sampleWGS_024_E20_F02"
[1] "plotting for sampleWGS_025_E20_F02"
[1] "plotting for sampleWGS_038_E20_F07"
[1] "plotting for sampleWGS_040_E20_F02"
[1] "plotting for sampleWGS_042_E20_F02"
[1] "plotting for sampleWGS_043_E20_F02"
[1] "plotting for sampleWGS_047_E20_F02"
[1] "plotting for sampleWGS_050_E20_F02"
[1] "plotting for sampleWGS_050_E20_F03"
[1] "plotting for sampleWGS_060_E20_F06"
[1] "plotting for sampleWGS_063_E20_F02"
[1] "plotting for sampleWGS_080_E20_F02"
[1] "plotting for sampleWGS_084_E20_F02"
[1] "plotting for sampleWGS_088_E20_F05"
[1] "plotting for sampleWGS_088_E20_F06"
[1] "plotting for sampleWGS_095_E20_F11"
[1] "plotting for sampleWGS_115_E20_F03"
[1] "plotting for sampleWGS_132_E20_F10"
[1] "plotting for sampleWGS_134_E20_F02"
[1] "plotting for sampleWGS_211_E20_F02"
[1] "plotting for sampleWGS_220_E20_F07"
[1] "plotting for sampleWGS_230_E20_F06"
[1] "plotting for sampleWGS_240_E20_F05"
[1] "plotting for sampleWGS_246_E20_F05"
[1] "plotting for sampleWGS_246_E20_F07"
[1] "plotting for sampleWGS_249_E20_F02"
[1] "plotting for sampleWGS_252_E20_F02"
[1] "plotting for sampleWGS_253_E20_F01"
[1] "plotting for sampleWGS_262_E20_F07"
[1] "plotting for sampleWGS_266_E20_F04"
[1] "plotting for sampleWGS_289_E20_F02"
[1] "plotting for sampleWGS_299_E20_F09"
[1] "plotting for sampleWGS_300_E20_F05"
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
[[7]]
NULL
[[8]]
NULL
[[9]]
NULL
[[10]]
NULL
[[11]]
NULL
[[12]]
NULL
[[13]]
NULL
[[14]]
NULL
[[15]]
NULL
[[16]]
NULL
[[17]]
NULL
[[18]]
NULL
[[19]]
NULL
[[20]]
NULL
[[21]]
NULL
[[22]]
NULL
[[23]]
NULL
[[24]]
NULL
[[25]]
NULL
[[26]]
NULL
[[27]]
NULL
[[28]]
NULL
[[29]]
NULL
[[30]]
NULL
[[31]]
NULL
[[32]]
NULL
[[33]]
NULL
[[34]]
NULL
[[35]]
NULL
lapply(names(result_lists_aaeg_genes_hit_with_symbol_counts),function(mm) {
print(paste0("saved sample", mm))
write.table(x = result_lists_aaeg_genes_hit_with_symbol_counts[[mm]],
file = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/RIL_mtDNA_overlap_numts_",mm,"_circos_table.csv"),sep = ",")})
[1] "saved sampleWGS_017_E20_F03"
[1] "saved sampleWGS_021_E20_F02"
[1] "saved sampleWGS_024_E20_F02"
[1] "saved sampleWGS_025_E20_F02"
[1] "saved sampleWGS_038_E20_F07"
[1] "saved sampleWGS_040_E20_F02"
[1] "saved sampleWGS_042_E20_F02"
[1] "saved sampleWGS_043_E20_F02"
[1] "saved sampleWGS_047_E20_F02"
[1] "saved sampleWGS_050_E20_F02"
[1] "saved sampleWGS_050_E20_F03"
[1] "saved sampleWGS_060_E20_F06"
[1] "saved sampleWGS_063_E20_F02"
[1] "saved sampleWGS_080_E20_F02"
[1] "saved sampleWGS_084_E20_F02"
[1] "saved sampleWGS_088_E20_F05"
[1] "saved sampleWGS_088_E20_F06"
[1] "saved sampleWGS_095_E20_F11"
[1] "saved sampleWGS_115_E20_F03"
[1] "saved sampleWGS_132_E20_F10"
[1] "saved sampleWGS_134_E20_F02"
[1] "saved sampleWGS_211_E20_F02"
[1] "saved sampleWGS_220_E20_F07"
[1] "saved sampleWGS_230_E20_F06"
[1] "saved sampleWGS_240_E20_F05"
[1] "saved sampleWGS_246_E20_F05"
[1] "saved sampleWGS_246_E20_F07"
[1] "saved sampleWGS_249_E20_F02"
[1] "saved sampleWGS_252_E20_F02"
[1] "saved sampleWGS_253_E20_F01"
[1] "saved sampleWGS_262_E20_F07"
[1] "saved sampleWGS_266_E20_F04"
[1] "saved sampleWGS_289_E20_F02"
[1] "saved sampleWGS_299_E20_F09"
[1] "saved sampleWGS_300_E20_F05"
[[1]]
NULL
[[2]]
NULL
[[3]]
NULL
[[4]]
NULL
[[5]]
NULL
[[6]]
NULL
[[7]]
NULL
[[8]]
NULL
[[9]]
NULL
[[10]]
NULL
[[11]]
NULL
[[12]]
NULL
[[13]]
NULL
[[14]]
NULL
[[15]]
NULL
[[16]]
NULL
[[17]]
NULL
[[18]]
NULL
[[19]]
NULL
[[20]]
NULL
[[21]]
NULL
[[22]]
NULL
[[23]]
NULL
[[24]]
NULL
[[25]]
NULL
[[26]]
NULL
[[27]]
NULL
[[28]]
NULL
[[29]]
NULL
[[30]]
NULL
[[31]]
NULL
[[32]]
NULL
[[33]]
NULL
[[34]]
NULL
[[35]]
NULL
#Add circos
#this script run for all the samples summed up for all genes. could be also done individually.
#png(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/Visualization_samples/R_CIRCOS_RIL/",df[["V2"]][1],".png"),width = 680,height = 680)
svg(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/RIL_mtDNA_numts_all_samples_circos.svg"),width = 9,height = 9)
numts_hits_df_merged <- do.call(rbind,result_lists_aaeg_genes_hit_with_symbol_counts)
numts_hits_df_merged_counted <- numts_hits_df_merged %>% dplyr::group_by(symbol) %>% dplyr::summarize(count=dplyr::n(),total_all_samples_sum=sum(col2_sum))
mt_genes_ae_2 <- mt_genes_ae
mt_genes_ae_2$start <- mt_genes_ae_2$start/100000
mt_genes_ae_2$end <- mt_genes_ae_2$end/100000
numts_hits_df_merged_counted_start_end <- mt_genes_ae_2 %>% dplyr::select(chr,start,end,gene) %>% dplyr::right_join(numts_hits_df_merged_counted,by = c("gene"="symbol")) %>% dplyr::mutate(value=total_all_samples_sum) %>% dplyr::select(chr,start,end,value,gene) %>% dplyr::arrange(start,end)
library(circlize)
ref_fd_ae <- data.frame("Chromosome"=c("chrM"),"ChromStart"=c(1),"Chromend"=c(16790))
#circos.genomicInitialize(ref_fd_ae)
circos.genomicInitialize(ref_fd_ae,plotType = NULL)
mt_genes_ae_2 <- mt_genes_ae
mt_genes_ae_2$start <- mt_genes_ae_2$start/100000
mt_genes_ae_2$end <- mt_genes_ae_2$end/100000
circos.genomicLabels(mt_genes_ae_2 ,labels.column = 5, cex=0.7,line_lwd=0.6, line_col="grey20", connection_height = 0.019,col=ifelse(numts_hits_df_merged_counted_start_end$value > mean(numts_hits_df_merged_counted_start_end$value),"red","black"),
labels_height = 0.2,niceFacing = TRUE,side = "outside")
# circos.genomicLabels(mt_genes_2 ,labels.column = 5, cex=0.5,line_lwd=0.6, line_col="grey20", connection_height = 0.05, labels_height = 0.2,niceFacing = TRUE,side = "outside")
circos.track(ylim=c(0, 1), panel.fun=function(x, y) {
chr=CELL_META$sector.index
xlim=CELL_META$xlim
ylim=CELL_META$ylim#
circos.text(mean(xlim), mean(ylim), chr, cex=0.8)}, bg.col = c("white"), bg.border=F, track.height=0.2) # the track height of the text specifies how thick is the track were we plot the bars
circos.track(track.index = get.current.track.index(),
track.height=0.8,
ylim=c(0,120), #this track y lim specifies how big are the bars up to in the track y lim
panel.fun = function(x, y) {
circos.genomicAxis(h = "bottom", direction = "inside",labels.cex = 0.6,tickLabelsStartFromZero = FALSE)})
# set_track_gap(gap = 0.05)
library(RColorBrewer)
green_pal <- colorRampPalette(brewer.pal(9, "Greens"))
map_value_to_color <- function(value) {
breaks <- c(0, 10, 20, 30, 40, 50, 70, 90,110 )
colors <- green_pal(9)
colors[findInterval(value, breaks)]
}
circos.rect(xleft = numts_hits_df_merged_counted_start_end$start,
ybottom = 5,
xright = numts_hits_df_merged_counted_start_end$end,
ytop = 5 + numts_hits_df_merged_counted_start_end$value,
#col = rgb(0.4, 0.7, 0.4, alpha = 0.5),
#col = ("#95D5B2"),
col = map_value_to_color(numts_hits_df_merged_counted_start_end$value),
#col = my_colors_scaled,
border = "black")
circos.lines(CELL_META$cell.xlim, c(max(numts_hits_df_merged_counted_start_end$value)+5, max(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "#A71246")
#circos.lines(CELL_META$cell.xlim, c(min(numts_hits_df_merged_counted_start_end$value)+5, min(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "blue")
circos.lines(CELL_META$cell.xlim, c(mean(numts_hits_df_merged_counted_start_end$value)+5, mean(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "#03071E")
dev.off()
null device
1
write.table(x = numts_hits_df_merged_counted_start_end,file = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/RIL_mtDNA_numts_all_samples_circos.csv"),sep = ",")
# numts_hits_df_merged_counted <- numts_hits_df_merged %>% dplyr::group_by(symbol) %>% dplyr::summarize(count=dplyr::n(),total_all_samples_sum=sum(col2_sum))
# mt_genes_ae_2 <- mt_genes_ae
# mt_genes_ae_2$start <- mt_genes_ae_2$start/100000
# mt_genes_ae_2$end <- mt_genes_ae_2$end/100000
# numts_hits_df_merged_counted_start_end <- mt_genes_ae_2 %>% dplyr::select(chr,start,end,gene) %>% dplyr::right_join(numts_hits_df_merged_counted,by = c("gene"="symbol")) %>% dplyr::mutate(value=total_all_samples_sum) %>% dplyr::select(chr,start,end,value,gene) %>% dplyr::arrange(start,end)
# svg(filename = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/knonwn_numts_circos_vs_RILs.svg",width = 9,height = 9)
library(circlize)
ref_fd_ae <- data.frame("Chromosome"=c("chrM"),"ChromStart"=c(1),"Chromend"=c(16790))
#circos.genomicInitialize(ref_fd_ae)
circos.genomicInitialize(ref_fd_ae,plotType = NULL)
mt_genes_ae_2 <- mt_genes_ae
mt_genes_ae_2$start <- mt_genes_ae_2$start/100000
mt_genes_ae_2$end <- mt_genes_ae_2$end/100000
circos.genomicLabels(mt_genes_ae_2 ,labels.column = 5, cex=0.7,line_lwd=0.6, line_col="grey20", connection_height = 0.019,col=ifelse(numts_hits_df_merged_counted_start_end$value > mean(numts_hits_df_merged_counted_start_end$value),"red","black"),
labels_height = 0.2,niceFacing = TRUE,side = "outside")
# circos.genomicLabels(mt_genes_2 ,labels.column = 5, cex=0.5,line_lwd=0.6, line_col="grey20", connection_height = 0.05, labels_height = 0.2,niceFacing = TRUE,side = "outside")
circos.track(ylim=c(0, 1), panel.fun=function(x, y) {
chr=CELL_META$sector.index
xlim=CELL_META$xlim
ylim=CELL_META$ylim#
circos.text(mean(xlim), mean(ylim), chr, cex=0.8)}, bg.col = c("white"), bg.border=F, track.height=0.2) # the track height of the text specifies how thick is the track were we plot the bars
circos.track(track.index = get.current.track.index(),
track.height=0.8,
ylim=c(0,120), #this track y lim specifies how big are the bars up to in the track y lim
panel.fun = function(x, y) {
circos.genomicAxis(h = "bottom", direction = "inside",labels.cex = 0.6,tickLabelsStartFromZero = FALSE)})
# set_track_gap(gap = 0.05)
library(RColorBrewer)
green_pal <- colorRampPalette(brewer.pal(9, "Greens"))
map_value_to_color <- function(value) {
breaks <- c(0, 10, 20, 30, 40, 50, 70, 90,110 )
colors <- green_pal(9)
colors[findInterval(value, breaks)]
}
circos.rect(xleft = numts_hits_df_merged_counted_start_end$start,
ybottom = 5,
xright = numts_hits_df_merged_counted_start_end$end,
ytop = 5 + numts_hits_df_merged_counted_start_end$value,
#col = rgb(0.4, 0.7, 0.4, alpha = 0.5),
#col = ("#95D5B2"),
col = map_value_to_color(numts_hits_df_merged_counted_start_end$value),
#col = my_colors_scaled,
border = "black")
circos.lines(CELL_META$cell.xlim, c(max(numts_hits_df_merged_counted_start_end$value)+5, max(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "#A71246")
#circos.lines(CELL_META$cell.xlim, c(min(numts_hits_df_merged_counted_start_end$value)+5, min(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "blue")
circos.lines(CELL_META$cell.xlim, c(mean(numts_hits_df_merged_counted_start_end$value)+5, mean(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "#03071E")
### Add the known numts
results_kn_numts_genes_hit_with_symbol_counts_start_end <- mt_genes_ae_2 %>% dplyr::select(chr,start,end,gene) %>%
dplyr::right_join(results_kn_numts_genes_hit_with_symbol_counts,by = c("gene"="symbol")) %>%
dplyr::mutate(value=col2_sum) %>%
dplyr::select(chr,start,end,value,gene) %>%
dplyr::arrange(start,end)
results_kn_numts_genes_hit_with_symbol_counts_start_end$value_scaled <- results_kn_numts_genes_hit_with_symbol_counts_start_end$value/10
reds_pal <- colorRampPalette(brewer.pal(9, "Reds"))
map_value_to_color <- function(value) {
breaks <- c(0, 10, 20, 30, 40, 50, 70, 90,110 )
colors <- reds_pal(9)
colors[findInterval(value, breaks)]
}
circos.track(track.height=0.2,
ylim=c(0,60))
circos.rect(xleft = results_kn_numts_genes_hit_with_symbol_counts_start_end$start,
ybottom = 2.5,
xright = results_kn_numts_genes_hit_with_symbol_counts_start_end$end,
ytop = 2.5 + results_kn_numts_genes_hit_with_symbol_counts_start_end$value,
#col = rgb(0.4, 0.7, 0.4, alpha = 0.5),
#col = ("#95D5B2"),
col = map_value_to_color(results_kn_numts_genes_hit_with_symbol_counts_start_end$value),
#col = my_colors_scaled,
border = "black")
circos.lines(CELL_META$cell.xlim, c(max(results_kn_numts_genes_hit_with_symbol_counts_start_end$value)+2.5,
max(results_kn_numts_genes_hit_with_symbol_counts_start_end$value)+2.5),lwd = 2, lty = 2, col = "#A71246")
#circos.lines(CELL_META$cell.xlim, c(min(numts_hits_df_merged_counted_start_end$value)+5, min(numts_hits_df_merged_counted_start_end$value)+5),lwd = 2, lty = 2, col = "blue")
circos.lines(CELL_META$cell.xlim, c(mean(results_kn_numts_genes_hit_with_symbol_counts_start_end$value)+2.5,
mean(results_kn_numts_genes_hit_with_symbol_counts_start_end$value)+2.5),lwd = 2, lty = 2, col = "#03071E")
###Here the last track of known numts are reported divided by 60 in the AREA to plot rather than 120 as in our numts.
### the added values are reduced to half in order to "Keep it proportional"
# dev.off()
write.table(results_kn_numts_genes_hit_with_symbol_counts_start_end,file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/knonwn_numts_preprocess_3_table_circos_vs_RILs.csv",sep = ",")
#processed hits in the genes used for the knonwn or reported numts.
write.table(results_kn_numts_genes_hit_with_symbol,file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/knonwn_numts_overlapping_trnas_genes_preprocess_1_before_circos.csv.csv",sep = ",")
#processed hits in the genes used for the knonwn or reported numts, counted by gene or tRNA.
write.table(results_kn_numts_genes_hit_with_symbol_counts, file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Table/knonwn_numts_overlapping_trnas_genes_preprocess_2_before_circos.csv",sep = ",")
# Create sample data with one column "value" ranging from 0 to 100
data <- data.frame(value = 0:max(results_kn_numts_genes_hit_with_symbol_counts_start_end$value))
# Create a bar plot with gradient fill
# svg(filename = paste0("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/Plots/known_numts_vs_RIL_circos_red_LEGEND.svg"),width = 3,height = 4 )
ggplot(data, aes(x = 1, y = value, fill = value)) +
geom_bar(stat = "identity", width = 1) +
scale_fill_gradientn(colours = brewer.pal(9, "Reds"),guide = "legend") +
coord_flip() +
theme_void()
# dev.off()
require(dplyr)
genes <- mt_genes_ae_2 %>% dplyr::select(c(gene,start,end))
genes
result_lists_aaeg_genes_hit_with_symbol_counts$WGS_017_E20_F03$symbol
unified_allMTdna_all_RIL <- lapply(names(result_lists_aaeg_genes_hit_with_symbol_counts), function(nm) result_lists_aaeg_genes_hit_with_symbol_counts[[nm]] %>% right_join(genes,by = c("symbol" = "gene")) %>% dplyr::select(c(symbol,col2_sum)))
names(unified_allMTdna_all_RIL) <- names(result_lists_aaeg_genes_hit_with_symbol_counts)
colnames(unified_allMTdna_all_RIL[["WGS_017_E20_F03"]])
#names returns only the names
#unified_allMTdna_all_RIL_2 <- lapply(names(unified_allMTdna_all_RIL),function(nm) colnames(unified_allMTdna_all_RIL[[nm]]) <- c("symbol",nm))
#setNames returns the object.
unified_allMTdna_all_RIL_2 <- lapply(names(unified_allMTdna_all_RIL), function(nm) setNames(object = unified_allMTdna_all_RIL[[nm]],nm = c("symbol",nm)))
names(unified_allMTdna_all_RIL_2) <- names(result_lists_aaeg_genes_hit_with_symbol_counts)
for (i in names(unified_allMTdna_all_RIL_2)) {
print(i)
unified_allMTdna_all_RIL_2[[i]][i][is.na(unified_allMTdna_all_RIL_2[[i]][i])] <- 0
}
unified_allMTdna_all_RIL_2_df <- do.call(cbind,unified_allMTdna_all_RIL_2)
unified_allMTdna_all_RIL_2_df$ID <- unified_allMTdna_all_RIL_2_df$WGS_017_E20_F03.symbol
unified_allMTdna_all_RIL_2_df <- subset(unified_allMTdna_all_RIL_2_df,select = -grep(pattern = "*.symbol$",names(unified_allMTdna_all_RIL_2_df)))
unified_allMTdna_all_RIL_2_mt <- as.matrix(unified_allMTdna_all_RIL_2_df[1:35])
unified_allMTdna_all_RIL_2_mt
rownames(unified_allMTdna_all_RIL_2_mt) <- unified_allMTdna_all_RIL_2_df$ID
#unified_allMTdna_all_RIL_2 <- lapply(names(unified_allMTdna_all_RIL_2),function(dfnm) unified_allMTdna_all_RIL_2[[dfnm]] %>% mutate(dfnm = coalesce(dfnm,0)))
#performs vby default the variation between the columns in each of the rows...
#principal components based on the variation between the samples in each of the 35genes
#t() principal components based on the variation between the genes across the 35 samples
pca_ril <- prcomp(unified_allMTdna_all_RIL_2_mt)
percentVar_obj <- pca_ril$sdev^2/sum(pca_ril$sdev^2)
#pull PCA values out of the PCA object #Save as a df
pca_df = as.data.frame(pca_ril[2]$rotation)
pca_df$Sample <- rownames(pca_df)
require(ggplot2)
pca_plot = ggplot(data = pca_df,aes(x=PC1, y=PC2,color=Sample)) + geom_jitter() +
labs(title="PCA RIL",
subtitle = "not scaled") +
xlab(paste0("PC",substr("PC1",start = 3,stop = 3),": ",
round(percentVar_obj[as.numeric(substr("PC1",start = 3,stop = 3))] * 100), "% variance")) +
ylab(paste0("PC",substr("PC2",start = 3,stop = 3),": ",
round(percentVar_obj[as.numeric(substr("PC2",start = 3,stop = 3))] * 100), "% variance"))
pca_plot
<!-- rnb-text-end -->
<!-- rnb-chunk-begin -->
<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuc2F2ZS5pbWFnZShmaWxlID0gXCIvaG9tZS9ib3Rvcy9TVlJBVzEvYm90b3MvMjAyM18wMl8wOF9OVU1UUy9maW5hbF9udW10cy9SL251bXRzX1JfcHJvY2Vzcy5SRGF0YVwiLGNvbXByZXNzID0gXCJnemlwXCIpXG5gYGAifQ== -->
```r
save.image(file = "/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/R/numts_R_process.RData",compress = "gzip")
load("/home/botos/SVRAW1/botos/2023_02_08_NUMTS/final_numts/R/numts_R_process.RData")